Part 3. Program

14. Pipes with magrittr

library(magrittr)
foo_foo <- little_bunny()
## Error in little_bunny(): could not find function "little_bunny"
foo_foo_1 <- hop(foo_foo, through = forest)
## Error in hop(foo_foo, through = forest): could not find function "hop"
foo_foo_2 <- scoop(foo_foo_1, up = field_mice)
## Error in scoop(foo_foo_1, up = field_mice): could not find function "scoop"
foo_foo_3 <- bop(foo_foo_2, on = head)
## Error in bop(foo_foo_2, on = head): could not find function "bop"
diamonds <- ggplot2::diamonds
diamonds2 <- diamonds %>%
  dplyr::mutate(price_per_carat = price / carat)

pryr::object_size(diamonds)
## 3.46 MB
pryr::object_size(diamonds2)
## 3.89 MB
pryr::object_size(diamonds, diamonds2)
## 3.89 MB
diamonds$carat[1] <- NA
pryr::object_size(diamonds)
## 3.46 MB
pryr::object_size(diamonds2)
## 3.89 MB
pryr::object_size(diamonds, diamonds2)
## 4.32 MB
foo_foo <- hop(foo_foo, through = forest)
## Error in hop(foo_foo, through = forest): could not find function "hop"
foo_foo <- scoop(foo_foo, up = field_mice)
## Error in scoop(foo_foo, up = field_mice): could not find function "scoop"
foo_foo <- bop(foo_foo, on = head)
## Error in bop(foo_foo, on = head): could not find function "bop"
bop(
  scoop(
    hop(foo_foo, through = forest), 
    up = field_mice
  ),
  on = head
)
## Error in bop(scoop(hop(foo_foo, through = forest), up = field_mice), on = head): could not find function "bop"
foo_foo %>% 
  hop(through = forest) %>%
  scoop(up = field_mouse) %>%
  bop(on = head)
## Error in eval(lhs, parent, parent): object 'foo_foo' not found
my_pipe <- function(.) {
. <- hop(., through = forest)
. <- scoop(., up = field_mice)
bop(., on = head)
}
my_pipe(foo_foo)
## Error in hop(., through = forest): could not find function "hop"
assign("x", 10)
x
## [1] 10
"x" %>% assign(100)
x
## [1] 10
env <- environment()
"x" %>% assign(100, envir = env)
x
## [1] 100
tryCatch(stop("!"), error = function(e) "An error")
## [1] "An error"
stop("!") %>%
  tryCatch(error = function(e) "An error")
## Error in eval(lhs, parent, parent): !
rnorm(100) %>% 
  matrix(ncol = 2) %>%
  plot() %>%
  str()

##  NULL
rnorm(100) %>%
  matrix(ncol = 2) %T>%
  plot() %>%
  str()

##  num [1:50, 1:2] -0.5 0.1 1.11 -1.54 -1.15 ...
mtcars %$%
  cor(disp, mpg)
## [1] -0.8475514
cor(mtcars$disp, mtcars$mpg)
## [1] -0.8475514
mtcars <- mtcars %>%
  transform(cyl = cyl * 2)
mtcars %<>% transform(cyl = cyl * 2)

15. Functions

df <- tibble::tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)

df$a <- (df$a - min(df$a, na.rm = TRUE)) / 
 (max(df$a, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$b <- (df$b - min(df$b, na.rm = TRUE)) /
(max(df$b, na.rm = TRUE) - min(df$a, na.rm = TRUE))
df$c <- (df$c - min(df$c, na.rm = TRUE)) /
(max(df$c, na.rm = TRUE) - min(df$c, na.rm = TRUE))
df$d <- (df$d - min(df$d, na.rm = TRUE)) /
(max(df$d, na.rm = TRUE) - min(df$d, na.rm = TRUE))
x <- df$a
(x - min(x, na.rm = TRUE))/(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
##  [1] 0.6134160 1.0000000 0.6556065 0.7257814 0.0000000 0.6314405 0.2565740
##  [8] 0.3184007 0.9185010 0.3450838
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
##  [1] 0.6134160 1.0000000 0.6556065 0.7257814 0.0000000 0.6314405 0.2565740
##  [8] 0.3184007 0.9185010 0.3450838
rescale01 <- function(x){
  rng <- range(x, na.rm=TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}
rescale01(c(0,5,10))
## [1] 0.0 0.5 1.0
rescale01(c(-10,0,10))
## [1] 0.0 0.5 1.0
rescale01(c(1,2,3,NA,5))
## [1] 0.00 0.25 0.50   NA 1.00
df$a <- rescale01(df$a)
df$b <- rescale01(df$b)
df$c <- rescale01(df$c)
df$d <- rescale01(df$d)
x <- c(1:10, Inf)
rescale01(x)
##  [1]   0   0   0   0   0   0   0   0   0   0 NaN
rescale01 <- function(x){
  rng <- range(x, na.rm = TRUE, finite = TRUE)
  (x - rng[1])/(rng[2]-rng[1])
}
rescale01(x)
##  [1] 0.0000000 0.1111111 0.2222222 0.3333333 0.4444444 0.5555556 0.6666667
##  [8] 0.7777778 0.8888889 1.0000000       Inf
f()
## Error in f(): could not find function "f"
my_awesome_function()
## Error in my_awesome_function(): could not find function "my_awesome_function"
impute_missing()
## Error in impute_missing(): could not find function "impute_missing"
collapse_years()
## Error in collapse_years(): could not find function "collapse_years"
col_mins <- function(x,y){}
rowMaxes <- function(x,y){}
input_select()
## Error in input_select(): could not find function "input_select"
input_checkbox()
## Error in input_checkbox(): could not find function "input_checkbox"
imput_text()
## Error in imput_text(): could not find function "imput_text"
select_input()
## Error in select_input(): could not find function "select_input"
checkbox_input()
## Error in checkbox_input(): could not find function "checkbox_input"
text_input()
## Error in text_input(): could not find function "text_input"
if(condition){
  # code executed when conditio is TRUE
} else{
  # code executed when condition is FALSE
}
## Error in eval(expr, envir, enclos): object 'condition' not found
has_name <- function(x){
  nms <- name(x)
  if(is.null(nms)){
    rep(FALSE, length(x))
  } else{
    !is.na(nms) & nms!=""
  }
}
if(c(TRUE, FALSE)){}
## Warning in if (c(TRUE, FALSE)) {: the condition has length > 1 and only the
## first element will be used
## NULL
if(NA){}
## Error in if (NA) {: missing value where TRUE/FALSE needed
identical(0L,0)
## [1] FALSE
x <- sqrt(2)^2
x
## [1] 2
x==2
## [1] FALSE
x-2
## [1] 4.440892e-16
if(this){
  # do that
} else if (that){
  # do something else
} else {
  #
}
## Error in eval(expr, envir, enclos): object 'this' not found
function(x,y,op){
  switch(op,
         plus = x + y,
         minus = x - y,
         times = x * y,
         divide = x / y,
         stop("Unknown op!"))
}
## function(x,y,op){
##   switch(op,
##          plus = x + y,
##          minus = x - y,
##          times = x * y,
##          divide = x / y,
##          stop("Unknown op!"))
## }
if(y < 0 && debug){
  message("Y is negative")
}
## Error in eval(expr, envir, enclos): object 'y' not found
if(y == 0){
  log(x)
} else{
  y ^ x
}
## Error in eval(expr, envir, enclos): object 'y' not found
y <- 10
x <- if(y<20) "Too low" else "Too high"
if(y < 20){
  x <- "Too low"
} else{
  x <- "Too high"
}
mean_ci <- function(x, conf = 0.95){
  se <- sd(x) / sqrt(length(x))
  alpha <- 1 - conf
  mean(x) + se * qnorm(c(alpha / 2, 1 - alpha / 2))
}

x <- runif(100)
mean_ci(x)
## [1] 0.3918179 0.5042840
mean_ci(x, conf = 0.99)
## [1] 0.3741482 0.5219537
mean(1:10, na.rm = TRUE)
## [1] 5.5
# x,y,z: vectors
# w: a vector of weights 
# df: a data frame 
# i,j: numeric indices (typically rows and columns)
# n: length, or number of rows
# p: number of columns 
wt_mean <- function(x, w){
  sum(x * w) / sum(x)
}
wt_var <- function(x, w){
  mu <- wt_mean(x, w)
  sum(w * (x - mu) ^ 2) / sum(w)
}
wt_sd <- function(x, w){
  sqrt(wt_var(x, w))
}
wt_mean(1:6, 1:3)
## [1] 2.190476
wt_mean <- function(x, w){
  if(length(x) != length(w)){
    stop("`x` and `w` must be the same length", call = FALSE)
  }
  sum(w*x) / sum(x)
}
wt_mean <- function(x, w, na.rm = FALSE){
  if(!is.logical(na.rm)){
    stop("`na.rm` must be logical")
  }
  if(length(na.rm) != 1){
    stop("`na.rm` must be length 1")
  }
  if(length(x) != length(w)){
    stop("`x` and `w` must be the same length", call.=FALSE)
  }
  if(na.rm){
    miss <- is.na(x) | is.na(w)
    x <- x[!miss]
    w <- w[!miss]
  }
  sum(w*x) / sum(x)
}
wt_mean <- function(x, w, na.rm = FALSE){
  stopifnot(is.logical(na.rm), length(na.rm) == 1)
  stopifnot(length(x)==length(w))
  
  if(na.rm){
    miss <- is.na(x) | is.na(w)
    x <- x[!miss]
    w <- w[!miss]
  }
  sum(w * x) / sum(x)
}
wt_mean(1:6, 6:1, na.rm = "foo")
## Error in wt_mean(1:6, 6:1, na.rm = "foo"): is.logical(na.rm) is not TRUE
sum(1,2,3,4,5,6,7,8,9,10)
## [1] 55
stringr::str_c("a","b","c","d","e","f")
## [1] "abcdef"
commas <- function(...) stringr::str_c(..., collapse = ", ")
commas(letters[1:10])
## [1] "a, b, c, d, e, f, g, h, i, j"
rule <- function(..., pad = "-"){
  title <- paste0(...)
  width <- getOption("width") - nchar(title) - 5
  cat(title, " ", stringr::str_dup(pad, width), "\n", sep="")
}
rule("Important output")
## Important output ------------------------------------------------------
x <- c(1,2)
sum(x, na.mr = TRUE)
## [1] 4
complicated_function <- function(x,y,z){
  if(length(x) == 0 || length(y) == 0){
    return(0)
  }
  # complicated code here
}
f <- function(){
  if(x){
    # Do
    # something
    # that
    # takes 
    # many
    # lines 
    # to
    # express
  }else{
    # return something short 
  }
}
f <- funtion(){
  if(!x){
    return(something_short)
  }
  # Do 
  # something
  # that
  # takes 
  # many 
  # lines 
  # to 
  # express
}
## Error: <text>:1:15: unexpected '{'
## 1: f <- funtion(){
##                   ^
show_missings <- function(df){
  n <- sum(is.na(df))
  cat("Missing values: ", n, "\n", sep = "")
  invisible(df)
}
show_missings(mtcars)
## Missing values: 0
x <- show_missings(mtcars)
## Missing values: 0
class(x)
## [1] "data.frame"
dim(x)
## [1] 32 11
mtcars %>% 
  show_missings() %>%
  mutate(mpg = ifelse(mpg < 20, NA, mpg)) %>%
  show_missings()
## Missing values: 0
## Error in mutate(., mpg = ifelse(mpg < 20, NA, mpg)): could not find function "mutate"
f <- function(x){
  x + y
}
y <- 100
f(10)
## [1] 110
y <- 1000
f(10)
## [1] 1010
`+` <- function(x, y) {
if (runif(1) < 0.1) {
sum(x, y)
} else {
sum(x, y) * 1.1
}
}
table(replicate(1000, 1 + 2))
## 
##   3 3.3 
## 113 887
#>
#> 3 3.3
#> 100 900
rm(`+`)

16. Vectors

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1       ✔ purrr   0.3.2  
## ✔ tibble  2.1.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.3       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract()       masks magrittr::extract()
## ✖ dplyr::filter()        masks stats::filter()
## ✖ .GlobalEnv::has_name() masks tibble::has_name()
## ✖ dplyr::lag()           masks stats::lag()
## ✖ purrr::set_names()     masks magrittr::set_names()
typeof(letters)
## [1] "character"
typeof(1:10)
## [1] "integer"
x <- list("a","b",1:10)
length(x)
## [1] 3
1:10 %% 3 == 0
##  [1] FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE
c(TRUE, TRUE, FALSE, NA)
## [1]  TRUE  TRUE FALSE    NA
typeof(1)
## [1] "double"
typeof(1L)
## [1] "integer"
1.5L
## [1] 1.5
x <- sqrt(2) ^ 2
x
## [1] 2
x - 2
## [1] 4.440892e-16
c(-1, 0, 1) / 0
## [1] -Inf  NaN  Inf
is.finite(0)
## [1] TRUE
is.infinite(Inf)
## [1] TRUE
is.na(NA)
## [1] TRUE
is.nan(NaN)
## [1] TRUE
x <- "This is a reasonably long string"
pryr::object_size(x)
## 152 B
y <- rep(x,1000)
pryr::object_size(y)
## 8.14 kB
NA
## [1] NA
NA_integer_
## [1] NA
NA_real_
## [1] NA
NA_character_
## [1] NA
x <- sample(20, 100, replace = TRUE)
y <- x > 10
sum(y)
## [1] 52
mean(y)
## [1] 0.52
typeof(c(TRUE, 1L))
## [1] "integer"
typeof(c(1L, 1.5))
## [1] "double"
typeof(c(1.5, "a"))
## [1] "character"
is_logical()
## Error in is_logical(): argument "x" is missing, with no default
is_integer()
## Error in is_integer(): argument "x" is missing, with no default
is_double()
## Error in is_double(): argument "x" is missing, with no default
is_numeric()
## Warning: Deprecated
## Error in is_integer(x): argument "x" is missing, with no default
is_character()
## Error in is_character(): argument "x" is missing, with no default
is_atomic()
## Error in is_atomic(): argument "x" is missing, with no default
is_list()
## Error in is_list(): argument "x" is missing, with no default
is_vector()
## Error in is_vector(): argument "x" is missing, with no default
sample(10) + 100
##  [1] 106 109 105 103 110 102 107 101 104 108
runif(10) > 0.5
##  [1]  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE
1:10 + 1:2
##  [1]  2  4  4  6  6  8  8 10 10 12
1:10 + 1:3
## Warning in 1:10 + 1:3: longer object length is not a multiple of shorter
## object length
##  [1]  2  4  6  5  7  9  8 10 12 11
tibble(x = 1:4,
       y = 1:2)
## Tibble columns must have consistent lengths, only values of length one are recycled:
## * Length 2: Column `y`
## * Length 4: Column `x`
tibble(x = 1:4,
       y = rep(1:2,2))
## # A tibble: 4 x 2
##       x     y
##   <int> <int>
## 1     1     1
## 2     2     2
## 3     3     1
## 4     4     2
tibble(x = 1:4,
       y = rep(1:2, each = 2))
## # A tibble: 4 x 2
##       x     y
##   <int> <int>
## 1     1     1
## 2     2     1
## 3     3     2
## 4     4     2
c(x = 1, y = 2, z = 4)
## x y z 
## 1 2 4
set_names(1:3, c("a","b","c"))
## a b c 
## 1 2 3
x <- c("one","two","three","four","five")
x[c(3,2,5)]
## [1] "three" "two"   "five"
x[c(1,1,5,5,2)]
## [1] "one"  "one"  "five" "five" "two"
x[c(-1, -3, -5)]
## [1] "two"  "four"
x[c(1,-1)]
## Error in x[c(1, -1)]: only 0's may be mixed with negative subscripts
x[0]
## character(0)
x <- c(10,3,NA,5,8,1,NA)
x[!is.na(x)]
## [1] 10  3  5  8  1
x[x %% 2 == 0]
## [1] 10 NA  8 NA
x <- c(abc = 1, def = 2, xyz = 5)
x[c("xyz", "def")]
## xyz def 
##   5   2
x <- list(1,2,3)
x
## [[1]]
## [1] 1
## 
## [[2]]
## [1] 2
## 
## [[3]]
## [1] 3
str(x)
## List of 3
##  $ : num 1
##  $ : num 2
##  $ : num 3
x_names <- list(a = 1, b = 2, c = 3)
str(x_named)
## Error in str(x_named): object 'x_named' not found
y <- list("a",1L,1.5,TRUE)
str(y)
## List of 4
##  $ : chr "a"
##  $ : int 1
##  $ : num 1.5
##  $ : logi TRUE
z <- list(list(1,2),list(3,4))
str(z)
## List of 2
##  $ :List of 2
##   ..$ : num 1
##   ..$ : num 2
##  $ :List of 2
##   ..$ : num 3
##   ..$ : num 4
x1 <- list(c(1,2),c(3,4))
x2 <- list(list(1,2),list(3,4))
x3 <- list(1,list(2,list(3)))
a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))
str(a[1:2])
## List of 2
##  $ a: int [1:3] 1 2 3
##  $ b: chr "a string"
str(a[4])
## List of 1
##  $ d:List of 2
##   ..$ : num -1
##   ..$ : num -5
str(y[[1]])
##  chr "a"
str(y[[4]])
##  logi TRUE
a$a
## [1] 1 2 3
a[["a"]]
## [1] 1 2 3
x <- 1:10
attr(x, "greeting")
## NULL
attr(x, "greeting")<-"Hi!"
attr(x, "farewell") <- "Bye!"
attributes(x)
## $greeting
## [1] "Hi!"
## 
## $farewell
## [1] "Bye!"
str(x)
##  int [1:10] 1 2 3 4 5 6 7 8 9 10
##  - attr(*, "greeting")= chr "Hi!"
##  - attr(*, "farewell")= chr "Bye!"
as.Date
## function (x, ...) 
## UseMethod("as.Date")
## <bytecode: 0x7ff4b3f5aa28>
## <environment: namespace:base>
methods("as.Date")
## [1] as.Date.character as.Date.default   as.Date.factor    as.Date.numeric  
## [5] as.Date.POSIXct   as.Date.POSIXlt  
## see '?methods' for accessing help and source code
getS3method("as.Date","default")
## function (x, ...) 
## {
##     if (inherits(x, "Date")) 
##         x
##     else if (is.logical(x) && all(is.na(x))) 
##         .Date(as.numeric(x))
##     else stop(gettextf("do not know how to convert '%s' to class %s", 
##         deparse(substitute(x)), dQuote("Date")), domain = NA)
## }
## <bytecode: 0x7ff4b6bd8118>
## <environment: namespace:base>
getS3method("as.Date","numeric")
## function (x, origin, ...) 
## {
##     if (missing(origin)) 
##         stop("'origin' must be supplied")
##     as.Date(origin, ...) + x
## }
## <bytecode: 0x7ff4b6bda118>
## <environment: namespace:base>
x <- factor(c("ab","cd","ab"), levels = c("ab","cd","ef"))
typeof(x)
## [1] "integer"
attributes(x)
## $levels
## [1] "ab" "cd" "ef"
## 
## $class
## [1] "factor"
x <- as.Date("1971-01-01")
unclass(x)
## [1] 365
typeof(x)
## [1] "double"
attributes(x)
## $class
## [1] "Date"
x <- lubridate::ymd_hm("1970-01-01 01:00")
unclass(x)
## [1] 3600
## attr(,"tzone")
## [1] "UTC"
typeof(x)
## [1] "double"
attributes(x)
## $class
## [1] "POSIXct" "POSIXt" 
## 
## $tzone
## [1] "UTC"
attr(x, "tzone") <- "US/Pacific"
x
## [1] "1969-12-31 17:00:00 PST"
attr(x, "tzone") <- "US/Eastern"
x
## [1] "1969-12-31 20:00:00 EST"
y <- as.POSIXct(x)
typeof(y)
## [1] "double"
attributes(y)
## $class
## [1] "POSIXct" "POSIXt" 
## 
## $tzone
## [1] "US/Eastern"
tb <- tibble::tibble(x = 1:5, y = 5:1)
typeof(tb)
## [1] "list"
attributes(tb)
## $names
## [1] "x" "y"
## 
## $row.names
## [1] 1 2 3 4 5
## 
## $class
## [1] "tbl_df"     "tbl"        "data.frame"
df <- data.frame(x = 1:5, y = 5:1)
typeof(df)
## [1] "list"
attributes(df)
## $names
## [1] "x" "y"
## 
## $class
## [1] "data.frame"
## 
## $row.names
## [1] 1 2 3 4 5

17. Iteration with purrr

library(tidyverse)
df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
df
## # A tibble: 10 x 4
##          a      b      c        d
##      <dbl>  <dbl>  <dbl>    <dbl>
##  1  0.719  -1.09  -2.19  -0.283  
##  2 -1.97    0.339  1.85   0.00335
##  3 -0.665   1.27   1.07   0.365  
##  4 -0.0911  0.293  1.00  -0.0918 
##  5 -0.251  -1.79   0.532 -1.43   
##  6  0.803   1.66  -0.673 -0.972  
##  7  1.02   -0.806  0.835 -0.148  
##  8 -0.574  -0.596 -0.248 -2.41   
##  9  0.508   0.187 -2.09   0.392  
## 10 -0.892  -1.07   1.83  -0.0432
median(df$a)
## [1] -0.1712479
median(df$b)
## [1] -0.2043996
median(df$c)
## [1] 0.6833623
median(df$d)
## [1] -0.1200563
output <- vector("double",ncol(df)) # 1. output
for (i in seq_along(df)){           # 2. sequence
  output[[i]] <- median(df[[i]])    # 3. body
}
output
## [1] -0.1712479 -0.2043996  0.6833623 -0.1200563
df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
rescale01 <- function(x){
  rng <- range(x, na.rm = TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}

df$a <- rescale01(df$a)
df$b <- rescale01(df$b)
df$c <- rescale01(df$c)
df$d <- rescale01(df$d)
for (i in seq_along(df)) {
  df[[i]] <- rescale01(df[[i]])
}
for (i in seq_along(x)){
  name <- names(x)[[i]]
  value <- x[[i]]
}
means <- c(0,1,2)

output <- double()
for (i in seq_along(means)){
  n <- sample(100,1)
  output <- c(output, rnorm(n, means[[i]]))
}
str(output)
##  num [1:99] 0.395 -0.583 -1.552 -1.174 -0.559 ...
out <- vector("list", length(means))
for (i in seq_along(means)){
  n <- sample(100,1)
  out[[i]] <- rnorm(n,means[[i]])
}
str(out)
## List of 3
##  $ : num [1:31] 1.4431 0.2276 -0.0045 0.6027 0.2299 ...
##  $ : num [1:12] 0.1253 0.6488 1.304 -0.0971 0.7728 ...
##  $ : num [1:56] 3.295 0.907 1.674 0.627 2.116 ...
str(unlist(out))
##  num [1:99] 1.4431 0.2276 -0.0045 0.6027 0.2299 ...
while(condition){
  # body
}
## Error in eval(expr, envir, enclos): object 'condition' not found
for (i in seq_along(x)){
  # body
}

# Equivalent to
i <- 1
while(i <= length(x)){
  # body
  i <- i + 1
}
flip <- function() sample(c("T","H"),1)

flips <- 0
nheads <- 0

while(nheads < 3) {
  if(flip() == "H"){
    nheads <- nheads + 1
  }else{
    nheads <- 0
  }
  flips <- flips + 1
}
flips
## [1] 4
df <- tibble(
  a = rnorm(10),
  b = rnorm(10),
  c = rnorm(10),
  d = rnorm(10)
)
output <- vector("double", length(df))
for (i in seq_along(df)) {
  output[[i]] <- mean(df[[i]])
}
output
## [1] -0.0675577  0.1918486 -0.2706198  0.2639539
col_mean <- function(df){
  output <- vector("double",length(df))
  for (i in seq_along(df)){
    output[i] <- mean(df[[i]])
  }
  output
}
col_median <- function(df) {
  output <- vector("double", length(df))
  for (i in seq_along(df)) {
    output[i] <- median(df[[i]])
  }
  output
}

col_sd <- function(df) {
  output <- vector("double", length(df))
  for (i in seq_along(df)) {
    output[i] <- sd(df[[i]])
  }
  output
}
f1 <- function(x) abs(x - mean(x)) ^ 1
f2 <- function(x) abs(x - mean(x)) ^ 2
f3 <- function(x) abs(x - mean(x)) ^ 3
f <- function(x, i) abs(x - mean(x)) ^ i
col_summary <- function(df, fun){
  out <- vector("double", length(df))
  for(i in seq_along(df)){
    out[i] <- fun(df[[i]])
  }
  out
}

col_summary(df, median)
## [1] -0.2387807  0.1880906 -0.1266958 -0.1073063
col_summary(df, mean)
## [1] -0.0675577  0.1918486 -0.2706198  0.2639539
map()
## Error in as_mapper(.f, ...): argument ".f" is missing, with no default
map_lgl()
## Error in as_mapper(.f, ...): argument ".f" is missing, with no default
map_int()
## Error in as_mapper(.f, ...): argument ".f" is missing, with no default
map_dbl()
## Error in as_mapper(.f, ...): argument ".f" is missing, with no default
map_chr()
## Error in as_mapper(.f, ...): argument ".f" is missing, with no default
map_dbl(df, mean)
##          a          b          c          d 
## -0.0675577  0.1918486 -0.2706198  0.2639539
map_dbl(df, median)
##          a          b          c          d 
## -0.2387807  0.1880906 -0.1266958 -0.1073063
map_dbl(df, sd)
##         a         b         c         d 
## 0.7626600 0.8737739 0.9896946 0.8868068
df %>% map_dbl(mean)
##          a          b          c          d 
## -0.0675577  0.1918486 -0.2706198  0.2639539
df %>% map_dbl(median)
##          a          b          c          d 
## -0.2387807  0.1880906 -0.1266958 -0.1073063
df %>% map_dbl(sd)
##         a         b         c         d 
## 0.7626600 0.8737739 0.9896946 0.8868068
map_dbl(df, mean, trim = 0.5)
##          a          b          c          d 
## -0.2387807  0.1880906 -0.1266958 -0.1073063
z <- list(x = 1:3, y = 4:5)
map_int(z, length)
## x y 
## 3 2
models <- mtcars %>%
  split(.$cyl) %>%
  map(function(df) lm(mpg~wt, data = df))
models
## $`16`
## 
## Call:
## lm(formula = mpg ~ wt, data = df)
## 
## Coefficients:
## (Intercept)           wt  
##      39.571       -5.647  
## 
## 
## $`24`
## 
## Call:
## lm(formula = mpg ~ wt, data = df)
## 
## Coefficients:
## (Intercept)           wt  
##       28.41        -2.78  
## 
## 
## $`32`
## 
## Call:
## lm(formula = mpg ~ wt, data = df)
## 
## Coefficients:
## (Intercept)           wt  
##      23.868       -2.192
models <- mtcars %>%
  split(.$cyl) %>%
  map(~lm(mpg~wt, data=.))
models
## $`16`
## 
## Call:
## lm(formula = mpg ~ wt, data = .)
## 
## Coefficients:
## (Intercept)           wt  
##      39.571       -5.647  
## 
## 
## $`24`
## 
## Call:
## lm(formula = mpg ~ wt, data = .)
## 
## Coefficients:
## (Intercept)           wt  
##       28.41        -2.78  
## 
## 
## $`32`
## 
## Call:
## lm(formula = mpg ~ wt, data = .)
## 
## Coefficients:
## (Intercept)           wt  
##      23.868       -2.192
models %>%
  map(summary) %>%
  map_dbl(~.$r.squared)
##        16        24        32 
## 0.5086326 0.4645102 0.4229655
models %>% 
  map(summary) %>%
  map_dbl("r.squared")
##        16        24        32 
## 0.5086326 0.4645102 0.4229655
x <- list(list(1,2,3), list(4,5,6), list(7,8,9))
x %>% map_dbl(2)
## [1] 2 5 8
x1 <- list(
c(0.27, 0.37, 0.57, 0.91, 0.20),
c(0.90, 0.94, 0.66, 0.63, 0.06),
c(0.21, 0.18, 0.69, 0.38, 0.77)
)
x2 <- list(
c(0.50, 0.72, 0.99, 0.38, 0.78),
c(0.93, 0.21, 0.65, 0.13, 0.27),
c(0.39, 0.01, 0.38, 0.87, 0.34)
)
threshold <- function(x, cutoff = 0.8) x[x > cutoff]
x1 %>% sapply(threshold) %>% str()
## List of 3
##  $ : num 0.91
##  $ : num [1:2] 0.9 0.94
##  $ : num(0)
#> List of 3
#> $ : num 0.91
#> $ : num [1:2] 0.9 0.94
#> $ : num(0)
x2 %>% sapply(threshold) %>% str()
##  num [1:3] 0.99 0.93 0.87
#> num [1:3] 0.99 0.93 0.87
safe_log <- safely(log)
str(safe_log(10))
## List of 2
##  $ result: num 2.3
##  $ error : NULL
str(safe_log("a"))
## List of 2
##  $ result: NULL
##  $ error :List of 2
##   ..$ message: chr "non-numeric argument to mathematical function"
##   ..$ call   : language .Primitive("log")(x, base)
##   ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
x <- list(1,10,"a")
y <- x %>% map(safely(log))
str(y)
## List of 3
##  $ :List of 2
##   ..$ result: num 0
##   ..$ error : NULL
##  $ :List of 2
##   ..$ result: num 2.3
##   ..$ error : NULL
##  $ :List of 2
##   ..$ result: NULL
##   ..$ error :List of 2
##   .. ..$ message: chr "non-numeric argument to mathematical function"
##   .. ..$ call   : language .Primitive("log")(x, base)
##   .. ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
y <- y %>% transpose()
str(y)
## List of 2
##  $ result:List of 3
##   ..$ : num 0
##   ..$ : num 2.3
##   ..$ : NULL
##  $ error :List of 3
##   ..$ : NULL
##   ..$ : NULL
##   ..$ :List of 2
##   .. ..$ message: chr "non-numeric argument to mathematical function"
##   .. ..$ call   : language .Primitive("log")(x, base)
##   .. ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
is_ok <- y$error %>% map_lgl(is_null)
x[!is_ok]
## [[1]]
## [1] "a"
y$result[is_ok] %>% flatten_dbl
## [1] 0.000000 2.302585
x <- list(1,10,"a")
x %>% map_dbl(possibly(log, NA_real_))
## [1] 0.000000 2.302585       NA
x <- list(1,-1)
x %>% map(quietly(log)) %>% str()
## List of 2
##  $ :List of 4
##   ..$ result  : num 0
##   ..$ output  : chr ""
##   ..$ warnings: chr(0) 
##   ..$ messages: chr(0) 
##  $ :List of 4
##   ..$ result  : num NaN
##   ..$ output  : chr ""
##   ..$ warnings: chr "NaNs produced"
##   ..$ messages: chr(0)
mu <- list(5,10,-3)
mu %>% 
  map(rnorm, n = 5) %>%
  str()
## List of 3
##  $ : num [1:5] 5.21 4.08 6.15 7.32 5.16
##  $ : num [1:5] 8.51 10.13 9.78 11.09 11.39
##  $ : num [1:5] -4.21 -2.91 -3.71 -3.66 -2.05
sigma <- list(1,5,10)
seq_along(mu) %>%
  map(~rnorm(5,mu[[.]],sigma[[.]])) %>%
  str()
## List of 3
##  $ : num [1:5] 5.7 5.8 7.05 5.86 3.68
##  $ : num [1:5] 10.39 9.37 16.63 11.44 8.8
##  $ : num [1:5] -21.99 -7.82 -12.81 -1.1 5.86
map2(mu, sigma, rnorm, n = 5) %>% str()
## List of 3
##  $ : num [1:5] 4.55 5.53 5.75 6.23 4.18
##  $ : num [1:5] 6.57 13.65 15.94 17.59 21.55
##  $ : num [1:5] -9.57 -5.38 19.52 -6.89 -11.84
mu %>% 
  map2(sigma, rnorm, n = 5) %>%
  str()
## List of 3
##  $ : num [1:5] 4.6 3.8 4.36 3.73 5.9
##  $ : num [1:5] 15.19 15.183 0.137 2.734 13.938
##  $ : num [1:5] 15.72 -8.55 -6.61 -20.83 -5.96
map2 <- function(x,y,f,...){
  out <- vector("list",length(x))
  for(i in seq_along(x)){
    out[[i]]<-f(x[[i]],y[[i]],...)
  }
  out
}
n <- list(1,3,5)
args1 <- list(n, mu, sigma)
args1 %>% 
  pmap(rnorm) %>%
  str()
## List of 3
##  $ : num 5.82
##  $ : num [1:3] 6.91 9.43 9.67
##  $ : num [1:5] -3.95 -10.32 -8.87 -5.97 -3.42
args2 <- list(mean = mu, sd = sigma, n = n)
args2 %>% 
  pmap(rnorm) %>%
  str()
## List of 3
##  $ : num 4.47
##  $ : num [1:3] 5.59 14.99 13.79
##  $ : num [1:5] -15.99 -15.84 11.95 -1.27 4.96
params <- tribble(
  ~mean, ~sd, ~n,
  5,1,1,
  10, 5, 3,
  -3, 10, 5
)
params %>%
  pmap(rnorm)
## [[1]]
## [1] 4.271721
## 
## [[2]]
## [1]  5.222806 13.321636  7.254072
## 
## [[3]]
## [1]  -5.17704 -11.02482 -19.16702  11.46873  13.57893
args(rnorm)
## function (n, mean = 0, sd = 1) 
## NULL
f <- c("runif","rnorm","rpois")
param <- list(
  list(min = -1, max = 1),
  list(sd = 5),
  list(lambda = 10)
)
invoke_map(f,param,n=5) %>% str()
## List of 3
##  $ : num [1:5] 0.799 -0.0709 -0.6819 0.8252 0.9747
##  $ : num [1:5] -2.81 7.53 -4.29 2.92 4.51
##  $ : int [1:5] 4 10 8 17 17
sim <- tribble(
  ~f, ~params,
  "runif", list(min = 1, max = 1),
  "rnorm", list(sd = 5),
  "rpois", list(lambda = 10)
)
sim %>% mutate(sim = invoke_map(f,params,n=10))
## # A tibble: 3 x 3
##   f     params     sim       
##   <chr> <list>     <list>    
## 1 runif <list [2]> <dbl [10]>
## 2 rnorm <list [1]> <dbl [10]>
## 3 rpois <list [1]> <int [10]>
x <- list(1,"a",3)
x %>%
  walk(print)
## [1] 1
## [1] "a"
## [1] 3
library(ggplot2)
plots <- mtcars %>%
  split(.$cyl) %>%
  map(~ggplot(.,aes(mpg,wt))+geom_point())
paths <- stringr::str_c(names(plots),".pdf")

pwalk(list(paths,plots),ggsave,path = tempdir())
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
iris %>%
  keep(is.factor) %>%
  str()
## 'data.frame':    150 obs. of  1 variable:
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
iris %>%
  discard(is.factor) %>% 
  str()
## 'data.frame':    150 obs. of  4 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
x <- list(1:5, letters, list(10))
x %>%
  some(is_character)
## [1] TRUE
x %>% 
  every(is_vector)
## [1] TRUE
x <- sample(10)
x
##  [1]  2  4  5  3  9  1  7  6  8 10
x %>% 
  detect(~ .>5)
## [1] 9
x %>%
  detect_index(~ .>5)
## [1] 5
x %>%
  head_while(~. >5)
## integer(0)
x %>%
  tail_while(~. >5)
## [1]  7  6  8 10
dfs <- list(
  age = tibble(name = "John", age = 30),
  sex = tibble(name = c("John", "Mary"), sex = c("M","F")),
  trt = tibble(name = "Mary", treatment = "A")
)

dfs %>% reduce(full_join)
## Joining, by = "name"
## Joining, by = "name"
## # A tibble: 2 x 4
##   name    age sex   treatment
##   <chr> <dbl> <chr> <chr>    
## 1 John     30 M     <NA>     
## 2 Mary     NA F     A
vs <- list(
  c(1,3,5,6,10),
  c(1,2,3,7,8,10),
  c(1,2,3,4,8,9,10)
)
vs %>% reduce(intersect)
## [1]  1  3 10
x <- sample(10)
x
##  [1]  8  9  2 10  3  1  5  6  7  4
x %>% accumulate(`+`)
##  [1]  8 17 19 29 32 33 38 44 51 55
x %>% reduce(`+`)
## [1] 55

Part 4. Model

18. Model basics with modelr

library(tidyverse)
library(modelr)
options(na.action = na.warn)
ggplot(sim1, aes(x,y)) + geom_point()

models <- tibble(
  a1 = runif(250, -20, 40),
  a2 = runif(250,-5,5)
)
ggplot(sim1, aes(x,y)) + 
  geom_abline(
    aes(intercept = a1, slope = a2),
    data = models, alpha = 1/4
  ) + 
  geom_point()

model1 <- function(a, data){
  a[1] + data$x * a[2]
}
model1(c(7,1.5), sim1)
##  [1]  8.5  8.5  8.5 10.0 10.0 10.0 11.5 11.5 11.5 13.0 13.0 13.0 14.5 14.5
## [15] 14.5 16.0 16.0 16.0 17.5 17.5 17.5 19.0 19.0 19.0 20.5 20.5 20.5 22.0
## [29] 22.0 22.0
measure_distance <- function(mod, data){
  diff <- data$y - model1(mod, data)
  sqrt(mean(diff^2))
}
measure_distance(c(7,1.5),sim1)
## [1] 2.665212
sim1_dist <- function(a1,a2){
  measure_distance(c(a1,a2),sim1)
}
models <- models %>%
  mutate(dist = purrr::map2_dbl(a1,a2,sim1_dist))
models
## # A tibble: 250 x 3
##         a1     a2  dist
##      <dbl>  <dbl> <dbl>
##  1 -11.8    3.28  10.1 
##  2   0.983  2.19   3.27
##  3  31.0   -4.11  19.2 
##  4  22.1    2.53  20.6 
##  5  34.1    1.12  25.0 
##  6  16.0    4.80  28.1 
##  7 -12.2    1.62  18.9 
##  8 -12.1   -4.13  53.4 
##  9 -15.7    0.822 27.0 
## 10  16.3   -1.57  13.2 
## # … with 240 more rows
ggplot(sim1, aes(x,y))+
  geom_point(size = 2, color = "grey30") + 
  geom_abline(
    aes(intercept = a1, slope = a2, color = -dist),
    data = filter(models, rank(dist)<=10)
  )

ggplot(models, aes(a1,a2))+
  geom_point(
    data = filter(models, rank(dist) <= 10),
    size = 4, color = "red"
  )+ 
  geom_point(aes(colour = -dist))

grid <- expand.grid(
  a1 = seq(-5,20,length = 25),
  a2 = seq(1,3,length = 25)
) %>%
  mutate(dist = purrr::map2_dbl(a1,a2,sim1_dist))

grid
##             a1       a2      dist
## 1   -5.0000000 1.000000 15.452475
## 2   -3.9583333 1.000000 14.443171
## 3   -2.9166667 1.000000 13.438807
## 4   -1.8750000 1.000000 12.440580
## 5   -0.8333333 1.000000 11.450094
## 6    0.2083333 1.000000 10.469547
## 7    1.2500000 1.000000  9.502017
## 8    2.2916667 1.000000  8.551921
## 9    3.3333333 1.000000  7.625781
## 10   4.3750000 1.000000  6.733488
## 11   5.4166667 1.000000  5.890443
## 12   6.4583333 1.000000  5.121026
## 13   7.5000000 1.000000  4.463479
## 14   8.5416667 1.000000  3.973729
## 15   9.5833333 1.000000  3.718673
## 16  10.6250000 1.000000  3.746556
## 17  11.6666667 1.000000  4.051540
## 18  12.7083333 1.000000  4.578581
## 19  13.7500000 1.000000  5.261366
## 20  14.7916667 1.000000  6.047370
## 21  15.8333333 1.000000  6.901415
## 22  16.8750000 1.000000  7.801187
## 23  17.9166667 1.000000  8.732562
## 24  18.9583333 1.000000  9.686429
## 25  20.0000000 1.000000 10.656749
## 26  -5.0000000 1.083333 14.961504
## 27  -3.9583333 1.083333 13.950902
## 28  -2.9166667 1.083333 12.945226
## 29  -1.8750000 1.083333 11.945720
## 30  -0.8333333 1.083333 10.954072
## 31   0.2083333 1.083333  9.972629
## 32   1.2500000 1.083333  9.004726
## 33   2.2916667 1.083333  8.055246
## 34   3.3333333 1.083333  7.131552
## 35   4.3750000 1.083333  6.245095
## 36   5.4166667 1.083333  5.414197
## 37   6.4583333 1.083333  4.668617
## 38   7.5000000 1.083333  4.055685
## 39   8.5416667 1.083333  3.642982
## 40   9.5833333 1.083333  3.502027
## 41  10.6250000 1.083333  3.664315
## 42  11.6666667 1.083333  4.093941
## 43  12.7083333 1.083333  4.718437
## 44  13.7500000 1.083333  5.471478
## 45  14.7916667 1.083333  6.307190
## 46  15.8333333 1.083333  7.196829
## 47  16.8750000 1.083333  8.122696
## 48  17.9166667 1.083333  9.073708
## 49  18.9583333 1.083333 10.042724
## 50  20.0000000 1.083333 11.024998
## 51  -5.0000000 1.166667 14.472350
## 52  -3.9583333 1.166667 13.460492
## 53  -2.9166667 1.166667 12.453550
## 54  -1.8750000 1.166667 11.452822
## 55  -0.8333333 1.166667 10.460090
## 56   0.2083333 1.166667  9.477867
## 57   1.2500000 1.166667  8.509793
## 58   2.2916667 1.166667  7.561306
## 59   3.3333333 1.166667  6.640802
## 60   4.3750000 1.166667  5.761709
## 61   5.4166667 1.166667  4.946157
## 62   6.4583333 1.166667  4.231050
## 63   7.5000000 1.166667  3.675492
## 64   8.5416667 1.166667  3.359589
## 65   9.5833333 1.166667  3.351801
## 66  10.6250000 1.166667  3.654100
## 67  11.6666667 1.166667  4.200055
## 68  12.7083333 1.166667  4.909034
## 69  13.7500000 1.166667  5.720743
## 70  14.7916667 1.166667  6.597373
## 71  15.8333333 1.166667  7.516242
## 72  16.8750000 1.166667  8.463605
## 73  17.9166667 1.166667  9.430878
## 74  18.9583333 1.166667 10.412514
## 75  20.0000000 1.166667 11.404804
## 76  -5.0000000 1.250000 13.985205
## 77  -3.9583333 1.250000 12.972153
## 78  -2.9166667 1.250000 11.964016
## 79  -1.8750000 1.250000 10.962151
## 80  -0.8333333 1.250000  9.968448
## 81   0.2083333 1.250000  8.985617
## 82   1.2500000 1.250000  8.017655
## 83   2.2916667 1.250000  7.070673
## 84   3.3333333 1.250000  6.154363
## 85   4.3750000 1.250000  5.284703
## 86   5.4166667 1.250000  4.488889
## 87   6.4583333 1.250000  3.813437
## 88   7.5000000 1.250000  3.332360
## 89   8.5416667 1.250000  3.136412
## 90   9.5833333 1.250000  3.277145
## 91  10.6250000 1.250000  3.716505
## 92  11.6666667 1.250000  4.365236
## 93  12.7083333 1.250000  5.144735
## 94  13.7500000 1.250000  6.004286
## 95  14.7916667 1.250000  6.914097
## 96  15.8333333 1.250000  7.856728
## 97  16.8750000 1.250000  8.821663
## 98  17.9166667 1.250000  9.802318
## 99  18.9583333 1.250000 10.794410
## 100 20.0000000 1.250000 11.795053
## 101 -5.0000000 1.333333 13.500287
## 102 -3.9583333 1.333333 12.486128
## 103 -2.9166667 1.333333 11.476898
## 104 -1.8750000 1.333333 10.474021
## 105 -0.8333333 1.333333  9.479514
## 106  0.2083333 1.333333  8.496316
## 107  1.2500000 1.333333  7.528860
## 108  2.2916667 1.333333  6.584088
## 109  3.3333333 1.333333  5.673345
## 110  4.3750000 1.333333  4.815974
## 111  5.4166667 1.333333  4.046048
## 112  6.4583333 1.333333  3.423090
## 113  7.5000000 1.333333  3.038869
## 114  8.5416667 1.333333  2.986979
## 115  9.5833333 1.333333  3.283215
## 116 10.6250000 1.333333  3.847999
## 117 11.6666667 1.333333  4.583103
## 118 12.7083333 1.333333  5.419659
## 119 13.7500000 1.333333  6.317493
## 120 14.7916667 1.333333  7.253887
## 121 15.8333333 1.333333  8.215666
## 122 16.8750000 1.333333  9.194868
## 123 17.9166667 1.333333 10.186470
## 124 18.9583333 1.333333 11.187174
## 125 20.0000000 1.333333 12.194741
## 126 -5.0000000 1.416667 13.017843
## 127 -3.9583333 1.416667 12.002697
## 128 -2.9166667 1.416667 10.992516
## 129 -1.8750000 1.416667  9.988803
## 130 -0.8333333 1.416667  8.993727
## 131  0.2083333 1.416667  8.010505
## 132  1.2500000 1.416667  7.044103
## 133  2.2916667 1.416667  6.102519
## 134  3.3333333 1.416667  5.199252
## 135  4.3750000 1.416667  4.358193
## 136  5.4166667 1.416667  3.622929
## 137  6.4583333 1.416667  3.070425
## 138  7.5000000 1.416667  2.810614
## 139  8.5416667 1.416667  2.922624
## 140  9.5833333 1.416667  3.369577
## 141 10.6250000 1.416667  4.041845
## 142 11.6666667 1.416667  4.846556
## 143 12.7083333 1.416667  5.728162
## 144 13.7500000 1.416667  6.656179
## 145 14.7916667 1.416667  7.613654
## 146 15.8333333 1.416667  8.590744
## 147 16.8750000 1.416667  9.581449
## 148 17.9166667 1.416667 10.581947
## 149 18.9583333 1.416667 11.589701
## 150 20.0000000 1.416667 12.602971
## 151 -5.0000000 1.500000 12.538160
## 152 -3.9583333 1.500000 11.522188
## 153 -2.9166667 1.500000 10.511248
## 154 -1.8750000 1.500000  9.506944
## 155 -0.8333333 1.500000  8.511626
## 156  0.2083333 1.500000  7.528858
## 157  1.2500000 1.500000  6.564280
## 158  2.2916667 1.500000  5.627253
## 159  3.3333333 1.500000  4.734166
## 160  4.3750000 1.500000  3.915203
## 161  5.4166667 1.500000  3.227296
## 162  6.4583333 1.500000  2.769874
## 163  7.5000000 1.500000  2.664414
## 164  8.5416667 1.500000  2.948922
## 165  9.5833333 1.500000  3.530343
## 166 10.6250000 1.500000  4.289597
## 167 11.6666667 1.500000  5.148601
## 168 12.7083333 1.500000  6.065121
## 169 13.7500000 1.500000  7.016654
## 170 14.7916667 1.500000  7.990701
## 171 15.8333333 1.500000  8.979940
## 172 16.8750000 1.500000  9.979853
## 173 17.9166667 1.500000 10.987527
## 174 18.9583333 1.500000 12.001008
## 175 20.0000000 1.500000 13.018938
## 176 -5.0000000 1.583333 12.061566
## 177 -3.9583333 1.583333 11.044982
## 178 -2.9166667 1.583333 10.033543
## 179 -1.8750000 1.583333  9.028981
## 180 -0.8333333 1.583333  8.033876
## 181  0.2083333 1.583333  7.052230
## 182  1.2500000 1.583333  6.090556
## 183  2.2916667 1.583333  5.160034
## 184  3.3333333 1.583333  4.281023
## 185  4.3750000 1.583333  3.492635
## 186  5.4166667 1.583333  2.870537
## 187  6.4583333 1.583333  2.540002
## 188  7.5000000 1.583333  2.614072
## 189  8.5416667 1.583333  3.063539
## 190  9.5833333 1.583333  3.755970
## 191 10.6250000 1.583333  4.582520
## 192 11.6666667 1.583333  5.482865
## 193 12.7083333 1.583333  6.426062
## 194 13.7500000 1.583333  7.395733
## 195 14.7916667 1.583333  8.382697
## 196 15.8333333 1.583333  9.381496
## 197 16.8750000 1.583333 10.388719
## 198 17.9166667 1.583333 11.402133
## 199 18.9583333 1.583333 12.420223
## 200 20.0000000 1.583333 13.441925
## 201 -5.0000000 1.666667 11.588444
## 202 -3.9583333 1.666667 10.571525
## 203 -2.9166667 1.666667  9.559936
## 204 -1.8750000 1.666667  8.555568
## 205 -0.8333333 1.666667  7.561300
## 206  0.2083333 1.666667  6.581711
## 207  1.2500000 1.666667  5.624474
## 208  2.2916667 1.666667  4.703258
## 209  3.3333333 1.666667  3.844048
## 210  4.3750000 1.666667  3.098856
## 211  5.4166667 1.666667  2.568902
## 212  6.4583333 1.666667  2.401196
## 213  7.5000000 1.666667  2.665026
## 214  8.5416667 1.666667  3.257166
## 215  9.5833333 1.666667  4.035595
## 216 10.6250000 1.666667  4.912542
## 217 11.6666667 1.666667  5.843821
## 218 12.7083333 1.666667  6.807170
## 219 13.7500000 1.666667  7.790701
## 220 14.7916667 1.666667  8.787640
## 221 15.8333333 1.666667  9.793894
## 222 16.8750000 1.666667 10.806860
## 223 17.9166667 1.666667 11.824815
## 224 18.9583333 1.666667 12.846571
## 225 20.0000000 1.666667 13.871290
## 226 -5.0000000 1.750000 11.119237
## 227 -3.9583333 1.750000 10.102345
## 228 -2.9166667 1.750000  9.091066
## 229 -1.8750000 1.750000  8.087504
## 230 -0.8333333 1.750000  7.094934
## 231  0.2083333 1.750000  6.118709
## 232  1.2500000 1.750000  5.168100
## 233  2.2916667 1.750000  4.260287
## 234  3.3333333 1.750000  3.429427
## 235  4.3750000 1.750000  2.746278
## 236  5.4166667 1.750000  2.343768
## 237  6.4583333 1.750000  2.369514
## 238  7.5000000 1.750000  2.811775
## 239  8.5416667 1.750000  3.516775
## 240  9.5833333 1.750000  4.358838
## 241 10.6250000 1.750000  5.272700
## 242 11.6666667 1.750000  6.226830
## 243 12.7083333 1.750000  7.205247
## 244 13.7500000 1.750000  8.199263
## 245 14.7916667 1.750000  9.203823
## 246 15.8333333 1.750000 10.215819
## 247 16.8750000 1.750000 11.233241
## 248 17.9166667 1.750000 12.254737
## 249 18.9583333 1.750000 13.279367
## 250 20.0000000 1.750000 14.306458
## 251 -5.0000000 1.833333 10.654461
## 252 -3.9583333 1.833333  9.638068
## 253 -2.9166667 1.833333  8.627706
## 254 -1.8750000 1.833333  7.625772
## 255 -0.8333333 1.833333  6.636086
## 256  0.2083333 1.833333  5.665069
## 257  1.2500000 1.833333  4.724248
## 258  2.2916667 1.833333  3.835906
## 259  3.3333333 1.833333  3.046304
## 260  4.3750000 1.833333  2.452732
## 261  5.4166667 1.833333  2.218550
## 262  6.4583333 1.833333  2.449116
## 263  7.5000000 1.833333  3.040480
## 264  8.5416667 1.833333  3.828969
## 265  9.5833333 1.833333  4.716739
## 266 10.6250000 1.833333  5.657242
## 267 11.6666667 1.833333  6.628068
## 268 12.7083333 1.833333  7.617633
## 269 13.7500000 1.833333  8.619484
## 270 14.7916667 1.833333  9.629789
## 271 15.8333333 1.833333 10.646139
## 272 16.8750000 1.833333 11.666957
## 273 17.9166667 1.833333 12.691163
## 274 18.9583333 1.833333 13.717999
## 275 20.0000000 1.833333 14.746915
## 276 -5.0000000 1.916667 10.194722
## 277 -3.9583333 1.916667  9.179435
## 278 -2.9166667 1.916667  8.170793
## 279 -1.8750000 1.916667  7.171597
## 280 -0.8333333 1.916667  6.186429
## 281  0.2083333 1.916667  5.223231
## 282  1.2500000 1.916667  4.296803
## 283  2.2916667 1.916667  3.437009
## 284  3.3333333 1.916667  2.708077
## 285  4.3750000 1.916667  2.241533
## 286  5.4166667 1.916667  2.210294
## 287  6.4583333 1.916667  2.629918
## 288  7.5000000 1.916667  3.334318
## 289  8.5416667 1.916667  4.181988
## 290  9.5833333 1.916667  5.102010
## 291 10.6250000 1.916667  6.061529
## 292 11.6666667 1.916667  7.044423
## 293 12.7083333 1.916667  8.042126
## 294 13.7500000 1.916667  9.049742
## 295 14.7916667 1.916667 10.064294
## 296 15.8333333 1.916667 11.083877
## 297 16.8750000 1.916667 12.107221
## 298 17.9166667 1.916667 13.133445
## 299 18.9583333 1.916667 14.161925
## 300 20.0000000 1.916667 15.192202
## 301 -5.0000000 2.000000  9.740734
## 302 -3.9583333 2.000000  8.727339
## 303 -2.9166667 2.000000  7.721472
## 304 -1.8750000 2.000000  6.726510
## 305 -0.8333333 2.000000  5.748121
## 306  0.2083333 2.000000  4.796457
## 307  1.2500000 2.000000  3.891173
## 308  2.2916667 2.000000  3.073533
## 309  3.3333333 2.000000  2.433540
## 310  4.3750000 2.000000  2.137234
## 311  5.4166667 2.000000  2.320250
## 312  6.4583333 2.000000  2.893007
## 313  7.5000000 2.000000  3.677711
## 314  8.5416667 2.000000  4.566373
## 315  9.5833333 2.000000  5.508912
## 316 10.6250000 2.000000  6.481867
## 317 11.6666667 2.000000  7.473367
## 318 12.7083333 2.000000  8.476909
## 319 13.7500000 2.000000  9.488671
## 320 14.7916667 2.000000 10.506280
## 321 15.8333333 2.000000 11.528187
## 322 16.8750000 2.000000 12.553343
## 323 17.9166667 2.000000 13.581012
## 324 18.9583333 2.000000 14.610663
## 325 20.0000000 2.000000 15.641906
## 326 -5.0000000 2.083333  9.293340
## 327 -3.9583333 2.083333  8.282848
## 328 -2.9166667 2.083333  7.281148
## 329 -1.8750000 2.083333  6.292440
## 330 -0.8333333 2.083333  5.323966
## 331  0.2083333 2.083333  4.389142
## 332  1.2500000 2.083333  3.514921
## 333  2.2916667 2.083333  2.759511
## 334  3.3333333 2.083333  2.246169
## 335  4.3750000 2.083333  2.155409
## 336  5.4166667 2.083333  2.533070
## 337  6.4583333 2.083333  3.218265
## 338  7.5000000 2.083333  4.058098
## 339  8.5416667 2.083333  4.974860
## 340  9.5833333 2.083333  5.932996
## 341 10.6250000 2.083333  6.915330
## 342 11.6666667 2.083333  7.912855
## 343 12.7083333 2.083333  8.920476
## 344 13.7500000 2.083333  9.935122
## 345 14.7916667 2.083333 10.954842
## 346 15.8333333 2.083333 11.978339
## 347 16.8750000 2.083333 13.004721
## 348 17.9166667 2.083333 14.033357
## 349 18.9583333 2.083333 15.063783
## 350 20.0000000 2.083333 16.095656
## 351 -5.0000000 2.166667  8.853540
## 352 -3.9583333 2.166667  7.847256
## 353 -2.9166667 2.166667  6.851557
## 354 -1.8750000 2.166667  5.871829
## 355 -0.8333333 2.166667  4.917627
## 356  0.2083333 2.166667  4.007227
## 357  1.2500000 2.166667  3.178494
## 358  2.2916667 2.166667  2.513548
## 359  3.3333333 2.166667  2.168677
## 360  4.3750000 2.166667  2.293149
## 361  5.4166667 2.166667  2.825605
## 362  6.4583333 2.166667  3.588829
## 363  7.5000000 2.166667  4.466037
## 364  8.5416667 2.166667  5.401983
## 365  9.5833333 2.166667  6.370831
## 366 10.6250000 2.166667  7.359599
## 367 11.6666667 2.166667  8.361222
## 368 12.7083333 2.166667  9.371581
## 369 13.7500000 2.166667 10.388125
## 370 14.7916667 2.166667 11.409203
## 371 15.8333333 2.166667 12.433697
## 372 16.8750000 2.166667 13.460827
## 373 17.9166667 2.166667 14.490032
## 374 18.9583333 2.166667 15.520900
## 375 20.0000000 2.166667 16.553121
## 376 -5.0000000 2.250000  8.422522
## 377 -3.9583333 2.250000  7.422129
## 378 -2.9166667 2.250000  6.434848
## 379 -1.8750000 2.250000  5.467785
## 380 -0.8333333 2.250000  4.533896
## 381  0.2083333 2.250000  3.658673
## 382  1.2500000 2.250000  2.895809
## 383  2.2916667 2.250000  2.357046
## 384  3.3333333 2.250000  2.212637
## 385  4.3750000 2.250000  2.531007
## 386  5.4166667 2.250000  3.175905
## 387  6.4583333 2.250000  3.992103
## 388  7.5000000 2.250000  4.894644
## 389  8.5416667 2.250000  5.843657
## 390  9.5833333 2.250000  6.819769
## 391 10.6250000 2.250000  7.812831
## 392 11.6666667 2.250000  8.817116
## 393 12.7083333 2.250000  9.829185
## 394 13.7500000 2.250000 10.846860
## 395 14.7916667 2.250000 11.868698
## 396 15.8333333 2.250000 12.893710
## 397 16.8750000 2.250000 13.921194
## 398 17.9166667 2.250000 14.950642
## 399 18.9583333 2.250000 15.981673
## 400 20.0000000 2.250000 17.014000
## 401 -5.0000000 2.333333  8.001707
## 402 -3.9583333 2.333333  7.009373
## 403 -2.9166667 2.333333  6.033691
## 404 -1.8750000 2.333333  5.084259
## 405 -0.8333333 2.333333  4.179006
## 406  0.2083333 2.333333  3.353898
## 407  1.2500000 2.333333  2.683899
## 408  2.2916667 2.333333  2.308274
## 409  3.3333333 2.333333  2.371305
## 410  4.3750000 2.333333  2.843973
## 411  5.4166667 2.333333  3.566990
## 412  6.4583333 2.333333  4.419139
## 413  7.5000000 2.333333  5.338942
## 414  8.5416667 2.333333  6.296821
## 415  9.5833333 2.333333  7.277757
## 416 10.6250000 2.333333  8.273553
## 417 11.6666667 2.333333  9.279426
## 418 12.7083333 2.333333 10.292422
## 419 13.7500000 2.333333 11.310628
## 420 14.7916667 2.333333 12.332753
## 421 15.8333333 2.333333 13.357897
## 422 16.8750000 2.333333 14.385415
## 423 17.9166667 2.333333 15.414833
## 424 18.9583333 2.333333 16.445793
## 425 20.0000000 2.333333 17.478023
## 426 -5.0000000 2.416667  7.592791
## 427 -3.9583333 2.416667  6.611303
## 428 -2.9166667 2.416667  5.651399
## 429 -1.8750000 2.416667  4.726249
## 430 -0.8333333 2.416667  3.860919
## 431  0.2083333 2.416667  3.105817
## 432  1.2500000 2.416667  2.560398
## 433  2.2916667 2.416667  2.373882
## 434  3.3333333 2.416667  2.623954
## 435  4.3750000 2.416667  3.210155
## 436  5.4166667 2.416667  3.986877
## 437  6.4583333 2.416667  4.863684
## 438  7.5000000 2.416667  5.795326
## 439  8.5416667 2.416667  6.759165
## 440  9.5833333 2.416667  7.743188
## 441 10.6250000 2.416667  8.740581
## 442 11.6666667 2.416667  9.747240
## 443 12.7083333 2.416667 10.760565
## 444 13.7500000 2.416667 11.778835
## 445 14.7916667 2.416667 12.800871
## 446 15.8333333 2.416667 13.825838
## 447 16.8750000 2.416667 14.853128
## 448 17.9166667 2.416667 15.882291
## 449 18.9583333 2.416667 16.912985
## 450 20.0000000 2.416667 17.944947
## 451 -5.0000000 2.500000  7.197802
## 452 -3.9583333 2.500000  6.230736
## 453 -2.9166667 2.500000  5.292061
## 454 -1.8750000 2.500000  4.399988
## 455 -0.8333333 2.500000  3.589432
## 456  0.2083333 2.500000  2.928871
## 457  1.2500000 2.500000  2.538245
## 458  2.2916667 2.500000  2.545040
## 459  3.3333333 2.500000  2.946508
## 460  4.3750000 2.500000  3.613409
## 461  5.4166667 2.500000  4.427379
## 462  6.4583333 2.500000  5.321351
## 463  7.5000000 2.500000  6.261151
## 464  8.5416667 2.500000  7.228927
## 465  9.5833333 2.500000  8.214798
## 466 10.6250000 2.500000  9.212956
## 467 11.6666667 2.500000 10.219801
## 468 12.7083333 2.500000 11.232999
## 469 13.7500000 2.500000 12.250973
## 470 14.7916667 2.500000 13.272624
## 471 15.8333333 2.500000 14.297164
## 472 16.8750000 2.500000 15.324013
## 473 17.9166667 2.500000 16.352737
## 474 18.9583333 2.500000 17.383002
## 475 20.0000000 2.500000 18.414550
## 476 -5.0000000 2.583333  6.819161
## 477 -3.9583333 2.583333  5.871076
## 478 -2.9166667 2.583333  4.960669
## 479 -1.8750000 2.583333  4.113038
## 480 -0.8333333 2.583333  3.375807
## 481  0.2083333 2.583333  2.836405
## 482  1.2500000 2.583333  2.620011
## 483  2.2916667 2.583333  2.802474
## 484  3.3333333 2.583333  3.318644
## 485  4.3750000 2.583333  4.042657
## 486  5.4166667 2.583333  4.882919
## 487  6.4583333 2.583333  5.789029
## 488  7.5000000 2.583333  6.734460
## 489  8.5416667 2.583333  7.704751
## 490  9.5833333 2.583333  8.691580
## 491 10.6250000 2.583333  9.689895
## 492 11.6666667 2.583333 10.696482
## 493 12.7083333 2.583333 11.709206
## 494 13.7500000 2.583333 12.726604
## 495 14.7916667 2.583333 13.747637
## 496 15.8333333 2.583333 14.771551
## 497 16.8750000 2.583333 15.797787
## 498 17.9166667 2.583333 16.825919
## 499 18.9583333 2.583333 17.855620
## 500 20.0000000 2.583333 18.886634
## 501 -5.0000000 2.666667  6.459744
## 502 -3.9583333 2.666667  5.536399
## 503 -2.9166667 2.666667  4.663184
## 504 -1.8750000 2.666667  3.874144
## 505 -0.8333333 2.666667  3.231538
## 506  0.2083333 2.666667  2.836693
## 507  1.2500000 2.666667  2.796596
## 508  2.2916667 2.666667  3.124934
## 509  3.3333333 2.666667  3.725535
## 510  4.3750000 2.666667  4.490452
## 511  5.4166667 2.666667  5.349657
## 512  6.4583333 2.666667  6.264475
## 513  7.5000000 2.666667  7.213779
## 514  8.5416667 2.666667  8.185579
## 515  9.5833333 2.666667  9.172728
## 516 10.6250000 2.666667 10.170758
## 517 11.6666667 2.666667 11.176754
## 518 12.7083333 2.666667 12.188744
## 519 13.7500000 2.666667 13.205350
## 520 14.7916667 2.666667 14.225583
## 521 15.8333333 2.666667 15.248714
## 522 16.8750000 2.666667 16.274197
## 523 17.9166667 2.666667 17.301613
## 524 18.9583333 2.666667 18.330638
## 525 20.0000000 2.666667 19.361016
## 526 -5.0000000 2.750000  6.122935
## 527 -3.9583333 2.750000  5.231503
## 528 -2.9166667 2.750000  4.406479
## 529 -1.8750000 2.750000  3.692645
## 530 -0.8333333 2.750000  3.166123
## 531  0.2083333 2.750000  2.929706
## 532  1.2500000 2.750000  3.051584
## 533  2.2916667 2.750000  3.494465
## 534  3.3333333 2.750000  4.156988
## 535  4.3750000 2.750000  4.951763
## 536  5.4166667 2.750000  5.824903
## 537  6.4583333 2.750000  6.746049
## 538  7.5000000 2.750000  7.697986
## 539  8.5416667 2.750000  8.670579
## 540  9.5833333 2.750000  9.657590
## 541 10.6250000 2.750000 10.655012
## 542 11.6666667 2.750000 11.660174
## 543 12.7083333 2.750000 12.671234
## 544 13.7500000 2.750000 13.686885
## 545 14.7916667 2.750000 14.706176
## 546 15.8333333 2.750000 15.728399
## 547 16.8750000 2.750000 16.753018
## 548 17.9166667 2.750000 17.779618
## 549 18.9583333 2.750000 18.807875
## 550 20.0000000 2.750000 19.837531
## 551 -5.0000000 2.833333  5.812668
## 552 -3.9583333 2.833333  4.961881
## 553 -2.9166667 2.833333  4.198041
## 554 -1.8750000 2.833333  3.577287
## 555 -0.8333333 2.833333  3.184423
## 556  0.2083333 2.833333  3.107130
## 557  1.2500000 2.833333  3.367210
## 558  2.2916667 2.833333  3.897703
## 559  3.3333333 2.833333  4.606106
## 560  4.3750000 2.833333  5.423142
## 561  5.4166667 2.833333  6.306733
## 562  6.4583333 2.833333  7.232525
## 563  7.5000000 2.833333  8.186214
## 564  8.5416667 2.833333  9.159089
## 565  9.5833333 2.833333 10.145633
## 566 10.6250000 2.833333 11.142216
## 567 11.6666667 2.833333 12.146366
## 568 12.7083333 2.833333 13.156351
## 569 13.7500000 2.833333 14.170924
## 570 14.7916667 2.833333 15.189165
## 571 15.8333333 2.833333 16.210383
## 572 16.8750000 2.833333 17.234049
## 573 17.9166667 2.833333 18.259752
## 574 18.9583333 2.833333 19.287165
## 575 20.0000000 2.833333 20.316030
## 576 -5.0000000 2.916667  5.533408
## 577 -3.9583333 2.916667  4.733562
## 578 -2.9166667 2.916667  4.045339
## 579 -1.8750000 2.916667  3.534552
## 580 -0.8333333 2.916667  3.285040
## 581  0.2083333 2.916667  3.355600
## 582  1.2500000 2.916667  3.728104
## 583  2.2916667 2.916667  4.325229
## 584  3.3333333 2.916667  5.068194
## 585  4.3750000 2.916667  5.902179
## 586  5.4166667 2.916667  6.793746
## 587  6.4583333 2.916667  7.722977
## 588  7.5000000 2.916667  8.677783
## 589  8.5416667 2.916667  9.650575
## 590  9.5833333 2.916667 10.636419
## 591 10.6250000 2.916667 11.631998
## 592 11.6666667 2.916667 12.635010
## 593 12.7083333 2.916667 13.643816
## 594 13.7500000 2.916667 14.657219
## 595 14.7916667 2.916667 15.674329
## 596 15.8333333 2.916667 16.694468
## 597 16.8750000 2.916667 17.717111
## 598 17.9166667 2.916667 18.741851
## 599 18.9583333 2.916667 19.768359
## 600 20.0000000 2.916667 20.796376
## 601 -5.0000000 3.000000  5.290068
## 602 -3.9583333 3.000000  4.552767
## 603 -2.9166667 3.000000  3.954833
## 604 -1.8750000 3.000000  3.567051
## 605 -0.8333333 3.000000  3.460801
## 606  0.2083333 3.000000  3.660679
## 607  1.2500000 3.000000  4.122395
## 608  2.2916667 3.000000  4.770519
## 609  3.3333333 3.000000  5.540009
## 610  4.3750000 3.000000  6.387150
## 611  5.4166667 3.000000  7.284903
## 612  6.4583333 3.000000  8.216694
## 613  7.5000000 3.000000  9.172157
## 614  8.5416667 3.000000 10.144605
## 615  9.5833333 3.000000 11.129586
## 616 10.6250000 3.000000 12.124047
## 617 11.6666667 3.000000 13.125832
## 618 12.7083333 3.000000 14.133385
## 619 13.7500000 3.000000 15.145554
## 620 14.7916667 3.000000 16.161472
## 621 15.8333333 3.000000 17.180474
## 622 16.8750000 3.000000 18.202042
## 623 17.9166667 3.000000 19.225767
## 624 18.9583333 3.000000 20.251322
## 625 20.0000000 3.000000 21.278443
grid %>%
  ggplot(aes(a1,a2))+
  geom_point(
    data = filter(grid, rank(dist)<=10),
    size = 4, color = "red"
  ) + 
  geom_point(aes(color = -dist))

ggplot(sim1, aes(x,y)) + 
  geom_point(size = 2, color = "grey30") + 
  geom_abline(
    aes(intercept = a1, slope = a2, color = -dist),
    data = filter(grid, rank(dist)<=10)
  )

best <- optim(c(0,0), measure_distance, data = sim1)
best$par
## [1] 4.222248 2.051204
ggplot(sim1, aes(x,y)) + 
  geom_point(size = 2, color = "grey30") + 
  geom_abline(intercept = best$par[1], slope = best$par[2])

sim1_mod <- lm(y~x, data = sim1)
coef(sim1_mod)
## (Intercept)           x 
##    4.220822    2.051533
grid <- sim1 %>%
  data_grid(x)
grid
## # A tibble: 10 x 1
##        x
##    <int>
##  1     1
##  2     2
##  3     3
##  4     4
##  5     5
##  6     6
##  7     7
##  8     8
##  9     9
## 10    10
sim1
## # A tibble: 30 x 2
##        x     y
##    <int> <dbl>
##  1     1  4.20
##  2     1  7.51
##  3     1  2.13
##  4     2  8.99
##  5     2 10.2 
##  6     2 11.3 
##  7     3  7.36
##  8     3 10.5 
##  9     3 10.5 
## 10     4 12.4 
## # … with 20 more rows
grid <- grid %>%
  add_predictions(sim1_mod)
grid
## # A tibble: 10 x 2
##        x  pred
##    <int> <dbl>
##  1     1  6.27
##  2     2  8.32
##  3     3 10.4 
##  4     4 12.4 
##  5     5 14.5 
##  6     6 16.5 
##  7     7 18.6 
##  8     8 20.6 
##  9     9 22.7 
## 10    10 24.7
ggplot(sim1, aes(x)) + 
  geom_point(aes(y = y)) + 
  geom_line(
    aes(y = pred),
    data = grid,
    color = "red",
    size = 1
  )

sim1 <- sim1 %>%
  add_residuals(sim1_mod)
sim1
## # A tibble: 30 x 3
##        x     y    resid
##    <int> <dbl>    <dbl>
##  1     1  4.20 -2.07   
##  2     1  7.51  1.24   
##  3     1  2.13 -4.15   
##  4     2  8.99  0.665  
##  5     2 10.2   1.92   
##  6     2 11.3   2.97   
##  7     3  7.36 -3.02   
##  8     3 10.5   0.130  
##  9     3 10.5   0.136  
## 10     4 12.4   0.00763
## # … with 20 more rows
ggplot(sim1, aes(resid)) + 
  geom_freqpoly(binwidth = 0.5)

ggplot(sim1, aes(x, resid)) + 
  geom_ref_line(h = 0) + 
  geom_point()

df <- tribble(
  ~y, ~x1, ~x2,
  4,2,5,
  5,1,6
)
model_matrix(df, y ~ x1)
## # A tibble: 2 x 2
##   `(Intercept)`    x1
##           <dbl> <dbl>
## 1             1     2
## 2             1     1
model_matrix(df, y ~ x1 - 1)
## # A tibble: 2 x 1
##      x1
##   <dbl>
## 1     2
## 2     1
model_matrix(df, y ~ x1 + x2)
## # A tibble: 2 x 3
##   `(Intercept)`    x1    x2
##           <dbl> <dbl> <dbl>
## 1             1     2     5
## 2             1     1     6
df <- tribble(
  ~sex, ~response,
  "male",1,
  "female",2,
  "male",1
)
model_matrix(df, response ~ sex)
## # A tibble: 3 x 2
##   `(Intercept)` sexmale
##           <dbl>   <dbl>
## 1             1       1
## 2             1       0
## 3             1       1
ggplot(sim2) + 
  geom_point(aes(x,y))

mod2 <- lm(y ~ x, data = sim2)
grid <- sim2 %>%
  data_grid(x) %>%
  add_predictions(mod2)
grid
## # A tibble: 4 x 2
##   x      pred
##   <chr> <dbl>
## 1 a      1.15
## 2 b      8.12
## 3 c      6.13
## 4 d      1.91
ggplot(sim2, aes(x)) + 
  geom_point(aes(y = y)) + 
  geom_point(
    data = grid,
    aes(y = pred),
    color = "red",
    size = 4
  )

tibble(x = "e") %>%
  add_predictions(mod2)
## Error in model.frame.default(Terms, newdata, na.action = na.action, xlev = object$xlevels): factor x has new level e
ggplot(sim3, aes(x1,y)) + 
  geom_point(aes(color = x2))

mod1 <- lm(y ~ x1 + x2, data = sim3)
mod2 <- lm(y ~ x1 * x2, data = sim3)
grid <- sim3 %>%
  data_grid(x1,x2) %>%
  gather_predictions(mod1,mod2)
grid
## # A tibble: 80 x 4
##    model    x1 x2     pred
##    <chr> <int> <fct> <dbl>
##  1 mod1      1 a      1.67
##  2 mod1      1 b      4.56
##  3 mod1      1 c      6.48
##  4 mod1      1 d      4.03
##  5 mod1      2 a      1.48
##  6 mod1      2 b      4.37
##  7 mod1      2 c      6.28
##  8 mod1      2 d      3.84
##  9 mod1      3 a      1.28
## 10 mod1      3 b      4.17
## # … with 70 more rows
ggplot(sim3, aes(x1,y,color = x2)) + 
  geom_point() + 
  geom_line(data = grid, aes(y = pred)) + 
  facet_wrap(~model)

sim3 <- sim3 %>%
  gather_residuals(mod1, mod2)

ggplot(sim3, aes(x1, resid, color = x2)) + 
  geom_point() + 
  facet_grid(model ~ x2)

mod1 <- lm(y ~ x1 + x2, data = sim4)
mod2 <- lm(y ~ x1 * x2, data = sim4)

grid <- sim4 %>%
  data_grid(
    x1 = seq_range(x1,5),
    x2 = seq_range(x2,5)
  ) %>%
  gather_predictions(mod1, mod2)
grid
## # A tibble: 50 x 4
##    model    x1    x2   pred
##    <chr> <dbl> <dbl>  <dbl>
##  1 mod1   -1    -1    0.996
##  2 mod1   -1    -0.5 -0.395
##  3 mod1   -1     0   -1.79 
##  4 mod1   -1     0.5 -3.18 
##  5 mod1   -1     1   -4.57 
##  6 mod1   -0.5  -1    1.91 
##  7 mod1   -0.5  -0.5  0.516
##  8 mod1   -0.5   0   -0.875
##  9 mod1   -0.5   0.5 -2.27 
## 10 mod1   -0.5   1   -3.66 
## # … with 40 more rows
seq_range(c(0.0123,0.923423),n=5)
## [1] 0.0123000 0.2400808 0.4678615 0.6956423 0.9234230
seq_range(c(0.0123,0.923423),n=5,pretty=TRUE)
## [1] 0.0 0.2 0.4 0.6 0.8 1.0
x1 <- rcauchy(100)
seq_range(x1,n=5)
## [1] -34.812171 -21.321906  -7.831641   5.658625  19.148890
seq_range(x1,n=5,trim=0.1)
## [1] -4.3109721 -2.4428614 -0.5747507  1.2933601  3.1614708
seq_range(x1,n=5,trim=0.25)
## [1] -2.8922244 -1.9190161 -0.9458079  0.0274004  1.0006087
seq_range(x1,n=5,trim=0.5)
## [1] -1.71863268 -1.17662139 -0.63461010 -0.09259882  0.44941247
x2 <- c(0,1)
seq_range(x2, n = 5)
## [1] 0.00 0.25 0.50 0.75 1.00
seq_range(x2,n=5,expand=0.1)
## [1] -0.050  0.225  0.500  0.775  1.050
seq_range(x2,n=5,expand=0.25)
## [1] -0.1250  0.1875  0.5000  0.8125  1.1250
seq_range(x2,n=5,expand=0.5)
## [1] -0.250  0.125  0.500  0.875  1.250
ggplot(grid, aes(x1,x2)) + 
  geom_tile(aes(fill = pred)) + 
  facet_wrap(~model)

ggplot(grid, aes(x1,pred,color = x2,group = x2)) + 
  geom_line() + 
  facet_wrap(~model)

ggplot(grid, aes(x2, pred, color = x1, group = x1)) + 
  geom_line() + 
  facet_wrap(~model)

df <- tribble(
  ~y, ~x,
  1,1,
  2,2,
  3,3
)
model_matrix(df, y ~ x^2 + x)
## # A tibble: 3 x 2
##   `(Intercept)`     x
##           <dbl> <dbl>
## 1             1     1
## 2             1     2
## 3             1     3
model_matrix(df, y ~ I(x^2) + x)
## # A tibble: 3 x 3
##   `(Intercept)` `I(x^2)`     x
##           <dbl>    <dbl> <dbl>
## 1             1        1     1
## 2             1        4     2
## 3             1        9     3
model_matrix(df, y ~ poly(x,2))
## # A tibble: 3 x 3
##   `(Intercept)` `poly(x, 2)1` `poly(x, 2)2`
##           <dbl>         <dbl>         <dbl>
## 1             1     -7.07e- 1         0.408
## 2             1     -7.85e-17        -0.816
## 3             1      7.07e- 1         0.408
library(splines)
model_matrix(df, y ~ ns(x,2))
## # A tibble: 3 x 3
##   `(Intercept)` `ns(x, 2)1` `ns(x, 2)2`
##           <dbl>       <dbl>       <dbl>
## 1             1       0           0    
## 2             1       0.566      -0.211
## 3             1       0.344       0.771
sim5 <- tibble(
  x = seq(0, 3.5 * pi, length = 50),
  y = 4 * sin(x) + rnorm(length(x))
)
ggplot(sim5, aes(x,y)) + 
  geom_point()

mod1 <- lm(y ~ ns(x, 1), data = sim5)
mod2 <- lm(y ~ ns(x, 2), data = sim5)
mod3 <- lm(y ~ ns(x, 3), data = sim5)
mod4 <- lm(y ~ ns(x, 4), data = sim5)
mod5 <- lm(y ~ ns(x, 5), data = sim5)
grid <- sim5 %>%
  data_grid(x = seq_range(x,n=50,expand=0.1)) %>%
  gather_predictions(mod1,mod2,mod3,mod4,mod5,.pred="y")

ggplot(sim5, aes(x,y)) + 
  geom_point() + 
  geom_line(data = grid, color = "red") + 
  facet_wrap(~model)

df <- tribble(
  ~x, ~y,
  1,2.2,
  2,NA,
  3,3.5,
  4,8.3,
  NA,10
)
mod <- lm(y ~ x, data = df)
## Warning: Dropping 2 rows with missing values
mod <- lm(y ~ x, data = df, na.action = na.exclude)
nobs(mod)
## [1] 3
# generalized linear models 
# generalized addictive models 
# penalized linear models 
# robust linear models 
# trees 

19. Model building

library(tidyverse)
library(modelr)
options(na.action = na.warn)
library(nycflights13)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
ggplot(diamonds, aes(cut, price)) + geom_boxplot()

ggplot(diamonds, aes(color, price)) + geom_boxplot()

ggplot(diamonds, aes(clarity, price)) + geom_boxplot()

ggplot(diamonds, aes(carat, price)) + geom_hex(bins = 50)
## Warning: Removed 1 rows containing non-finite values (stat_binhex).

diamonds2 <- diamonds %>%
  filter(carat <= 2.5) %>%
  mutate(lprice = log2(price), lcarat = log2(carat))
ggplot(diamonds2, aes(lcarat, lprice)) + geom_hex(bins = 50)

mod_diamond <- lm(lprice ~ lcarat, data = diamonds2)
grid <- diamonds2 %>%
  data_grid(carat = seq_range(carat,20)) %>%
  mutate(lcarat = log2(carat)) %>%
  add_predictions(mod_diamond,"lprice") %>%
  mutate(price = 2 ^ lprice)

ggplot(diamonds2, aes(carat, price)) + 
  geom_hex(bins = 50) + 
  geom_line(data = grid, color = "red", size = 1)

diamonds2 <- diamonds2 %>%
  add_residuals(mod_diamond, "lresid")
ggplot(diamonds2, aes(lcarat, lresid)) + 
  geom_hex(bins = 50)

ggplot(diamonds2, aes(cut, lresid)) + geom_boxplot()

ggplot(diamonds2, aes(color, lresid)) + geom_boxplot()

ggplot(diamonds2, aes(clarity, lresid)) + geom_boxplot()

mod_diamond2 <- lm(
  lprice ~ lcarat + color + cut + clarity,
  data = diamonds2
)
grid <- diamonds2 %>%
  data_grid(cut, .model = mod_diamond2) %>%
  add_predictions(mod_diamond2)
grid
## # A tibble: 5 x 5
##   cut       lcarat color clarity  pred
##   <ord>      <dbl> <chr> <chr>   <dbl>
## 1 Fair      -0.515 G     VS2      11.2
## 2 Good      -0.515 G     VS2      11.3
## 3 Very Good -0.515 G     VS2      11.4
## 4 Premium   -0.515 G     VS2      11.4
## 5 Ideal     -0.515 G     VS2      11.4
ggplot(grid, aes(cut, pred)) + 
  geom_point()

diamonds2 <- diamonds2 %>%
  add_residuals(mod_diamond2,"lresid2")
ggplot(diamonds2, aes(lcarat, lresid2)) + 
  geom_hex(bins = 50)

diamonds2 %>%
  filter(abs(lresid2) > 1) %>%
  add_predictions(mod_diamond2) %>%
  mutate(pred = round(2^pred)) %>%
  select(price, pred, carat:table, x:z) %>%
  arrange(price)
## # A tibble: 16 x 11
##    price  pred carat cut       color clarity depth table     x     y     z
##    <int> <dbl> <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  1013   264 0.25  Fair      F     SI2      54.4    64  4.3   4.23  2.32
##  2  1186   284 0.25  Premium   G     SI2      59      60  5.33  5.28  3.12
##  3  1186   284 0.25  Premium   G     SI2      58.8    60  5.33  5.28  3.12
##  4  1262  2644 1.03  Fair      E     I1       78.2    54  5.72  5.59  4.42
##  5  1415   639 0.35  Fair      G     VS2      65.9    54  5.57  5.53  3.66
##  6  1415   639 0.35  Fair      G     VS2      65.9    54  5.57  5.53  3.66
##  7  1715   576 0.32  Fair      F     VS2      59.6    60  4.42  4.34  2.61
##  8  1776   412 0.290 Fair      F     SI1      55.8    60  4.48  4.41  2.48
##  9  2160   314 0.34  Fair      F     I1       55.8    62  4.72  4.6   2.6 
## 10  2366   775 0.3   Very Good D     VVS2     60.6    58  4.33  4.35  2.63
## 11  3360  1373 0.51  Premium   F     SI1      62.7    62  5.09  4.96  3.15
## 12  3807  1540 0.61  Good      F     SI2      62.5    65  5.36  5.29  3.33
## 13  3920  1705 0.51  Fair      F     VVS2     65.4    60  4.98  4.9   3.23
## 14  4368  1705 0.51  Fair      F     VVS2     60.7    66  5.21  5.11  3.13
## 15 10011  4048 1.01  Fair      D     SI2      64.6    58  6.25  6.2   4.02
## 16 10470 23622 2.46  Premium   E     SI2      59.7    59  8.82  8.76  5.25
daily <- flights %>%
  mutate(date = make_date(year, month, day)) %>%
  group_by(date) %>%
  summarize(n = n())
daily
## # A tibble: 365 x 2
##    date           n
##    <date>     <int>
##  1 2013-01-01   842
##  2 2013-01-02   943
##  3 2013-01-03   914
##  4 2013-01-04   915
##  5 2013-01-05   720
##  6 2013-01-06   832
##  7 2013-01-07   933
##  8 2013-01-08   899
##  9 2013-01-09   902
## 10 2013-01-10   932
## # … with 355 more rows
ggplot(daily, aes(date, n)) + geom_line()

daily <- daily %>%
  mutate(wday = wday(date, label = TRUE))
ggplot(daily, aes(wday, n)) + 
  geom_boxplot()

mod <- lm(n ~ wday, data = daily)

grid <- daily %>% 
  data_grid(wday) %>%
  add_predictions(mod, "n")

ggplot(daily, aes(wday, n)) + 
  geom_boxplot() + 
  geom_point(data = grid, color = "red", size = 4)

daily <- daily %>%
  add_residuals(mod)
daily %>%
  ggplot(aes(date, resid)) + 
  geom_ref_line(h = 0) + 
  geom_line()

ggplot(daily, aes(date, resid, color = wday)) + 
  geom_ref_line(h = 0) + 
  geom_line()

daily %>%
  filter(resid < -100)
## # A tibble: 11 x 4
##    date           n wday  resid
##    <date>     <int> <ord> <dbl>
##  1 2013-01-01   842 Tue   -109.
##  2 2013-01-20   786 Sun   -105.
##  3 2013-05-26   729 Sun   -162.
##  4 2013-07-04   737 Thu   -229.
##  5 2013-07-05   822 Fri   -145.
##  6 2013-09-01   718 Sun   -173.
##  7 2013-11-28   634 Thu   -332.
##  8 2013-11-29   661 Fri   -306.
##  9 2013-12-24   761 Tue   -190.
## 10 2013-12-25   719 Wed   -244.
## 11 2013-12-31   776 Tue   -175.
daily %>%
  ggplot(aes(date, resid)) + 
  geom_ref_line(h = 0) + 
  geom_line(color = "grey50") + 
  geom_smooth(se = FALSE, span = 0.2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

daily %>%
  filter(wday == "Sat") %>%
  ggplot(aes(date, n)) + 
  geom_point() + 
  geom_line() + 
  scale_x_date(
    NULL,
    date_breaks = "1 month",
    date_labels = "%b"
  )

term <- function(date){
  cut(date,
      breaks = ymd(20130101, 20130605, 20130825, 20140101),
      labels = c("spring","summer","fall"))
}

daily <- daily %>%
  mutate(term = term(date))

daily %>%
  filter(wday == "Sat") %>%
  ggplot(aes(date, n, color = term)) + 
  geom_point(alpha = 1/3) + 
  geom_line() + 
  scale_x_date(
    NULL,
    date_breaks = "1 month",
    date_labels = "%b"
  )

daily %>%
  ggplot(aes(wday, n, color = term)) + 
  geom_boxplot()

mod1 <- lm(n ~ wday, data = daily)
mod2 <- lm(n ~ wday * term, data = daily)

daily %>%
  gather_residuals(without_term = mod1, with_term = mod2) %>%
  ggplot(aes(date, resid, color = model)) + 
  geom_line(alpha = 0.75)

grid <- daily %>%
  data_grid(wday, term) %>%
  add_predictions(mod2, "n")

ggplot(daily, aes(wday, n)) + 
  geom_boxplot() + 
  geom_point(data = grid, color = "red") + 
  facet_wrap(~term)

mod3 <- MASS::rlm(n ~ wday * term, data = daily)

daily %>%
  add_residuals(mod3, "resid") %>%
  ggplot(aes(date, resid)) + 
  geom_hline(yintercept = 0, size = 2, color = "white") + 
  geom_line()

compute_vars <- function(data){
  data %>%
    mutate(
      term = term(date),
      wday = wday(date, label = TRUE)
    )
}
wday2 <- function(x) wday(x, label = TRUE)
mod3 <- lm(n ~ wday2(date) * term(date), data = daily)
library(splines)
mod <- MASS::rlm(n ~ wday * ns(date,5), data = daily)

daily %>%
  data_grid(wday, date = seq_range(date, n = 13)) %>%
  add_predictions(mod) %>%
  ggplot(aes(date, pred, color = wday)) + 
  geom_line() + 
  geom_point()

20. Many models with purrr and broom

library(modelr)
library(tidyverse)
library(gapminder)
gapminder
## # A tibble: 1,704 x 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
## # … with 1,694 more rows
gapminder %>%
  ggplot(aes(year, lifeExp, group = country)) + 
  geom_line(alpha = 1/3)

nz <- filter(gapminder, country == "New Zealand")
nz %>%
  ggplot(aes(year, lifeExp)) + 
  geom_line() + 
  ggtitle("Full data = ")

nz_mod <- lm(lifeExp ~ year, data = nz)
nz %>%
  add_predictions(nz_mod) %>%
  ggplot(aes(year, pred)) + 
  geom_line() + 
  ggtitle("Linear trend + ")

nz %>%
  add_residuals(nz_mod) %>%
  ggplot(aes(year, resid)) + 
  geom_hline(yintercept = 0, color = "white", size = 3) + 
  geom_line() + 
  ggtitle("Remaining pattern")

by_country <- gapminder %>%
  group_by(country, continent) %>%
  nest()

by_country
## # A tibble: 142 x 3
##    country     continent data             
##    <fct>       <fct>     <list>           
##  1 Afghanistan Asia      <tibble [12 × 4]>
##  2 Albania     Europe    <tibble [12 × 4]>
##  3 Algeria     Africa    <tibble [12 × 4]>
##  4 Angola      Africa    <tibble [12 × 4]>
##  5 Argentina   Americas  <tibble [12 × 4]>
##  6 Australia   Oceania   <tibble [12 × 4]>
##  7 Austria     Europe    <tibble [12 × 4]>
##  8 Bahrain     Asia      <tibble [12 × 4]>
##  9 Bangladesh  Asia      <tibble [12 × 4]>
## 10 Belgium     Europe    <tibble [12 × 4]>
## # … with 132 more rows
by_country$data[[1]]
## # A tibble: 12 x 4
##     year lifeExp      pop gdpPercap
##    <int>   <dbl>    <int>     <dbl>
##  1  1952    28.8  8425333      779.
##  2  1957    30.3  9240934      821.
##  3  1962    32.0 10267083      853.
##  4  1967    34.0 11537966      836.
##  5  1972    36.1 13079460      740.
##  6  1977    38.4 14880372      786.
##  7  1982    39.9 12881816      978.
##  8  1987    40.8 13867957      852.
##  9  1992    41.7 16317921      649.
## 10  1997    41.8 22227415      635.
## 11  2002    42.1 25268405      727.
## 12  2007    43.8 31889923      975.
country_model <- function(df){
  lm(lifeExp ~ year, data = df)
}
models <- map(by_country$data, country_model)
by_country <- by_country %>%
  mutate(model = map(data, country_model))
by_country
## # A tibble: 142 x 4
##    country     continent data              model   
##    <fct>       <fct>     <list>            <list>  
##  1 Afghanistan Asia      <tibble [12 × 4]> <S3: lm>
##  2 Albania     Europe    <tibble [12 × 4]> <S3: lm>
##  3 Algeria     Africa    <tibble [12 × 4]> <S3: lm>
##  4 Angola      Africa    <tibble [12 × 4]> <S3: lm>
##  5 Argentina   Americas  <tibble [12 × 4]> <S3: lm>
##  6 Australia   Oceania   <tibble [12 × 4]> <S3: lm>
##  7 Austria     Europe    <tibble [12 × 4]> <S3: lm>
##  8 Bahrain     Asia      <tibble [12 × 4]> <S3: lm>
##  9 Bangladesh  Asia      <tibble [12 × 4]> <S3: lm>
## 10 Belgium     Europe    <tibble [12 × 4]> <S3: lm>
## # … with 132 more rows
by_country %>%
  filter(continent == "Europe")
## # A tibble: 30 x 4
##    country                continent data              model   
##    <fct>                  <fct>     <list>            <list>  
##  1 Albania                Europe    <tibble [12 × 4]> <S3: lm>
##  2 Austria                Europe    <tibble [12 × 4]> <S3: lm>
##  3 Belgium                Europe    <tibble [12 × 4]> <S3: lm>
##  4 Bosnia and Herzegovina Europe    <tibble [12 × 4]> <S3: lm>
##  5 Bulgaria               Europe    <tibble [12 × 4]> <S3: lm>
##  6 Croatia                Europe    <tibble [12 × 4]> <S3: lm>
##  7 Czech Republic         Europe    <tibble [12 × 4]> <S3: lm>
##  8 Denmark                Europe    <tibble [12 × 4]> <S3: lm>
##  9 Finland                Europe    <tibble [12 × 4]> <S3: lm>
## 10 France                 Europe    <tibble [12 × 4]> <S3: lm>
## # … with 20 more rows
by_country %>%
  arrange(continent, country)
## # A tibble: 142 x 4
##    country                  continent data              model   
##    <fct>                    <fct>     <list>            <list>  
##  1 Algeria                  Africa    <tibble [12 × 4]> <S3: lm>
##  2 Angola                   Africa    <tibble [12 × 4]> <S3: lm>
##  3 Benin                    Africa    <tibble [12 × 4]> <S3: lm>
##  4 Botswana                 Africa    <tibble [12 × 4]> <S3: lm>
##  5 Burkina Faso             Africa    <tibble [12 × 4]> <S3: lm>
##  6 Burundi                  Africa    <tibble [12 × 4]> <S3: lm>
##  7 Cameroon                 Africa    <tibble [12 × 4]> <S3: lm>
##  8 Central African Republic Africa    <tibble [12 × 4]> <S3: lm>
##  9 Chad                     Africa    <tibble [12 × 4]> <S3: lm>
## 10 Comoros                  Africa    <tibble [12 × 4]> <S3: lm>
## # … with 132 more rows
by_country <- by_country %>%
  mutate(
    resids = map2(data, model, add_residuals)
  )
by_country
## # A tibble: 142 x 5
##    country     continent data              model    resids           
##    <fct>       <fct>     <list>            <list>   <list>           
##  1 Afghanistan Asia      <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
##  2 Albania     Europe    <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
##  3 Algeria     Africa    <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
##  4 Angola      Africa    <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
##  5 Argentina   Americas  <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
##  6 Australia   Oceania   <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
##  7 Austria     Europe    <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
##  8 Bahrain     Asia      <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
##  9 Bangladesh  Asia      <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## 10 Belgium     Europe    <tibble [12 × 4]> <S3: lm> <tibble [12 × 5]>
## # … with 132 more rows
by_country$resids[[1]]
## # A tibble: 12 x 5
##     year lifeExp      pop gdpPercap   resid
##    <int>   <dbl>    <int>     <dbl>   <dbl>
##  1  1952    28.8  8425333      779. -1.11  
##  2  1957    30.3  9240934      821. -0.952 
##  3  1962    32.0 10267083      853. -0.664 
##  4  1967    34.0 11537966      836. -0.0172
##  5  1972    36.1 13079460      740.  0.674 
##  6  1977    38.4 14880372      786.  1.65  
##  7  1982    39.9 12881816      978.  1.69  
##  8  1987    40.8 13867957      852.  1.28  
##  9  1992    41.7 16317921      649.  0.754 
## 10  1997    41.8 22227415      635. -0.534 
## 11  2002    42.1 25268405      727. -1.54  
## 12  2007    43.8 31889923      975. -1.22
resids <- unnest(by_country, resids)
resids
## # A tibble: 1,704 x 7
##    country     continent  year lifeExp      pop gdpPercap   resid
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>   <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779. -1.11  
##  2 Afghanistan Asia       1957    30.3  9240934      821. -0.952 
##  3 Afghanistan Asia       1962    32.0 10267083      853. -0.664 
##  4 Afghanistan Asia       1967    34.0 11537966      836. -0.0172
##  5 Afghanistan Asia       1972    36.1 13079460      740.  0.674 
##  6 Afghanistan Asia       1977    38.4 14880372      786.  1.65  
##  7 Afghanistan Asia       1982    39.9 12881816      978.  1.69  
##  8 Afghanistan Asia       1987    40.8 13867957      852.  1.28  
##  9 Afghanistan Asia       1992    41.7 16317921      649.  0.754 
## 10 Afghanistan Asia       1997    41.8 22227415      635. -0.534 
## # … with 1,694 more rows
resids %>%
  ggplot(aes(year, resid)) + 
  geom_line(aes(group = country), alpha = 1/3) + 
  geom_smooth(se = TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

resids %>%
  ggplot(aes(year, resid, group = country)) + 
  geom_line(alpha = 1/3) + 
  facet_wrap(~continent)

broom::glance(nz_mod)
## # A tibble: 1 x 11
##   r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
##       <dbl>         <dbl> <dbl>     <dbl>   <dbl> <int>  <dbl> <dbl> <dbl>
## 1     0.954         0.949 0.804      205. 5.41e-8     2  -13.3  32.6  34.1
## # … with 2 more variables: deviance <dbl>, df.residual <int>
by_country %>%
  mutate(glance = map(model, broom::glance)) %>%
  unnest(glance)
## # A tibble: 142 x 16
##    country continent data  model resids r.squared adj.r.squared sigma
##    <fct>   <fct>     <lis> <lis> <list>     <dbl>         <dbl> <dbl>
##  1 Afghan… Asia      <tib… <S3:… <tibb…     0.948         0.942 1.22 
##  2 Albania Europe    <tib… <S3:… <tibb…     0.911         0.902 1.98 
##  3 Algeria Africa    <tib… <S3:… <tibb…     0.985         0.984 1.32 
##  4 Angola  Africa    <tib… <S3:… <tibb…     0.888         0.877 1.41 
##  5 Argent… Americas  <tib… <S3:… <tibb…     0.996         0.995 0.292
##  6 Austra… Oceania   <tib… <S3:… <tibb…     0.980         0.978 0.621
##  7 Austria Europe    <tib… <S3:… <tibb…     0.992         0.991 0.407
##  8 Bahrain Asia      <tib… <S3:… <tibb…     0.967         0.963 1.64 
##  9 Bangla… Asia      <tib… <S3:… <tibb…     0.989         0.988 0.977
## 10 Belgium Europe    <tib… <S3:… <tibb…     0.995         0.994 0.293
## # … with 132 more rows, and 8 more variables: statistic <dbl>,
## #   p.value <dbl>, df <int>, logLik <dbl>, AIC <dbl>, BIC <dbl>,
## #   deviance <dbl>, df.residual <int>
glance <- by_country %>%
  mutate(glance = map(model, broom::glance)) %>%
  unnest(glance, .drop = TRUE)
glance
## # A tibble: 142 x 13
##    country continent r.squared adj.r.squared sigma statistic  p.value    df
##    <fct>   <fct>         <dbl>         <dbl> <dbl>     <dbl>    <dbl> <int>
##  1 Afghan… Asia          0.948         0.942 1.22      181.  9.84e- 8     2
##  2 Albania Europe        0.911         0.902 1.98      102.  1.46e- 6     2
##  3 Algeria Africa        0.985         0.984 1.32      662.  1.81e-10     2
##  4 Angola  Africa        0.888         0.877 1.41       79.1 4.59e- 6     2
##  5 Argent… Americas      0.996         0.995 0.292    2246.  4.22e-13     2
##  6 Austra… Oceania       0.980         0.978 0.621     481.  8.67e-10     2
##  7 Austria Europe        0.992         0.991 0.407    1261.  7.44e-12     2
##  8 Bahrain Asia          0.967         0.963 1.64      291.  1.02e- 8     2
##  9 Bangla… Asia          0.989         0.988 0.977     930.  3.37e-11     2
## 10 Belgium Europe        0.995         0.994 0.293    1822.  1.20e-12     2
## # … with 132 more rows, and 5 more variables: logLik <dbl>, AIC <dbl>,
## #   BIC <dbl>, deviance <dbl>, df.residual <int>
glance %>%
  arrange(r.squared)
## # A tibble: 142 x 13
##    country continent r.squared adj.r.squared sigma statistic p.value    df
##    <fct>   <fct>         <dbl>         <dbl> <dbl>     <dbl>   <dbl> <int>
##  1 Rwanda  Africa       0.0172      -0.0811   6.56     0.175  0.685      2
##  2 Botswa… Africa       0.0340      -0.0626   6.11     0.352  0.566      2
##  3 Zimbab… Africa       0.0562      -0.0381   7.21     0.596  0.458      2
##  4 Zambia  Africa       0.0598      -0.0342   4.53     0.636  0.444      2
##  5 Swazil… Africa       0.0682      -0.0250   6.64     0.732  0.412      2
##  6 Lesotho Africa       0.0849      -0.00666  5.93     0.927  0.358      2
##  7 Cote d… Africa       0.283        0.212    3.93     3.95   0.0748     2
##  8 South … Africa       0.312        0.244    4.74     4.54   0.0588     2
##  9 Uganda  Africa       0.342        0.276    3.19     5.20   0.0457     2
## 10 Congo,… Africa       0.348        0.283    2.43     5.34   0.0434     2
## # … with 132 more rows, and 5 more variables: logLik <dbl>, AIC <dbl>,
## #   BIC <dbl>, deviance <dbl>, df.residual <int>
glance %>%
  ggplot(aes(continent, r.squared)) + 
  geom_jitter(width = 0.5)

bad_fit <- filter(glance, r.squared < 0.25)

gapminder %>%
  semi_join(bad_fit, by = "country") %>%
  ggplot(aes(year, lifeExp, color = country)) + 
  geom_line()

data.frame(x = list(1:3, 3:5))
##   x.1.3 x.3.5
## 1     1     3
## 2     2     4
## 3     3     5
data.frame(
  x = I(list(1:3, 3:5)),
  y = c("1,2","3,4,5")
)
##         x     y
## 1 1, 2, 3   1,2
## 2 3, 4, 5 3,4,5
tibble(
  x = list(1:3, 3:5),
  y = c("1,2", "3,4,5")
)
## # A tibble: 2 x 2
##   x         y    
##   <list>    <chr>
## 1 <int [3]> 1,2  
## 2 <int [3]> 3,4,5
tribble(
  ~x, ~y,
  1:3, "1,2",
  3:5, "3,4,5"
)
## # A tibble: 2 x 2
##   x         y    
##   <list>    <chr>
## 1 <int [3]> 1,2  
## 2 <int [3]> 3,4,5
gapminder %>%
  group_by(country, continent) %>%
  nest()
## # A tibble: 142 x 3
##    country     continent data             
##    <fct>       <fct>     <list>           
##  1 Afghanistan Asia      <tibble [12 × 4]>
##  2 Albania     Europe    <tibble [12 × 4]>
##  3 Algeria     Africa    <tibble [12 × 4]>
##  4 Angola      Africa    <tibble [12 × 4]>
##  5 Argentina   Americas  <tibble [12 × 4]>
##  6 Australia   Oceania   <tibble [12 × 4]>
##  7 Austria     Europe    <tibble [12 × 4]>
##  8 Bahrain     Asia      <tibble [12 × 4]>
##  9 Bangladesh  Asia      <tibble [12 × 4]>
## 10 Belgium     Europe    <tibble [12 × 4]>
## # … with 132 more rows
gapminder %>%
  nest(year:gdpPercap)
## # A tibble: 142 x 3
##    country     continent data             
##    <fct>       <fct>     <list>           
##  1 Afghanistan Asia      <tibble [12 × 4]>
##  2 Albania     Europe    <tibble [12 × 4]>
##  3 Algeria     Africa    <tibble [12 × 4]>
##  4 Angola      Africa    <tibble [12 × 4]>
##  5 Argentina   Americas  <tibble [12 × 4]>
##  6 Australia   Oceania   <tibble [12 × 4]>
##  7 Austria     Europe    <tibble [12 × 4]>
##  8 Bahrain     Asia      <tibble [12 × 4]>
##  9 Bangladesh  Asia      <tibble [12 × 4]>
## 10 Belgium     Europe    <tibble [12 × 4]>
## # … with 132 more rows
df <- tribble(
  ~x1,
  "a,b,c",
  "d,e,f,g"
)
df
## # A tibble: 2 x 1
##   x1     
##   <chr>  
## 1 a,b,c  
## 2 d,e,f,g
df %>%
  mutate(x2 = stringr::str_split(x1, ","))
## # A tibble: 2 x 2
##   x1      x2       
##   <chr>   <list>   
## 1 a,b,c   <chr [3]>
## 2 d,e,f,g <chr [4]>
df %>%
  mutate(x2 = stringr::str_split(x1, ",")) %>%
  unnest()
## # A tibble: 7 x 2
##   x1      x2   
##   <chr>   <chr>
## 1 a,b,c   a    
## 2 a,b,c   b    
## 3 a,b,c   c    
## 4 d,e,f,g d    
## 5 d,e,f,g e    
## 6 d,e,f,g f    
## 7 d,e,f,g g
sim <- tribble(
  ~f, ~params,
  "runif",list(min = -1, max = -1),
  "rnorm",list(sd = 5),
  "rpois",list(lambda = 10)
)

sim %>%
  mutate(sims = invoke_map(f, params, n = 10))
## # A tibble: 3 x 3
##   f     params     sims      
##   <chr> <list>     <list>    
## 1 runif <list [2]> <dbl [10]>
## 2 rnorm <list [1]> <dbl [10]>
## 3 rpois <list [1]> <int [10]>
mtcars %>%
  group_by(cyl) %>%
  summarize(q = quantile(mpg))
## Error: Column `q` must be length 1 (a summary value), not 5
mtcars %>%
  group_by(cyl) %>% 
  summarize(q = list(quantile(mpg)))
## # A tibble: 3 x 2
##     cyl q        
##   <dbl> <list>   
## 1    16 <dbl [5]>
## 2    24 <dbl [5]>
## 3    32 <dbl [5]>
probs <- c(0.01, 0.25, 0.5, 0.75, 0.99)
mtcars %>%
  group_by(cyl) %>%
  summarize(p = list(probs), q = list(quantile(mpg, probs))) %>%
  unnest()
## # A tibble: 15 x 3
##      cyl     p     q
##    <dbl> <dbl> <dbl>
##  1    16  0.01  21.4
##  2    16  0.25  22.8
##  3    16  0.5   26  
##  4    16  0.75  30.4
##  5    16  0.99  33.8
##  6    24  0.01  17.8
##  7    24  0.25  18.6
##  8    24  0.5   19.7
##  9    24  0.75  21  
## 10    24  0.99  21.4
## 11    32  0.01  10.4
## 12    32  0.25  14.4
## 13    32  0.5   15.2
## 14    32  0.75  16.2
## 15    32  0.99  19.1
x <- list(
  a = 1:5,
  b = 3:4,
  c = 5:6
)
df <- enframe(x)
df
## # A tibble: 3 x 2
##   name  value    
##   <chr> <list>   
## 1 a     <int [5]>
## 2 b     <int [2]>
## 3 c     <int [2]>
df %>%
  mutate(
    smry = map2_chr(
      name,
      value, ~ stringr::str_c(.x, ":", .y[1])
    )
  )
## # A tibble: 3 x 3
##   name  value     smry 
##   <chr> <list>    <chr>
## 1 a     <int [5]> a:1  
## 2 b     <int [2]> b:3  
## 3 c     <int [2]> c:5
df <- tribble(
  ~x,
  letters[1:5],
  1:3,
  runif(5)
)
df
## # A tibble: 3 x 1
##   x        
##   <list>   
## 1 <chr [5]>
## 2 <int [3]>
## 3 <dbl [5]>
df %>%
  mutate(
    type = map_chr(x, typeof),
    length = map_int(x, length)
  )
## # A tibble: 3 x 3
##   x         type      length
##   <list>    <chr>      <int>
## 1 <chr [5]> character      5
## 2 <int [3]> integer        3
## 3 <dbl [5]> double         5
df <- tribble(
  ~x,
  list(a = 1, b = 2),
  list(a = 2, c = 4)
)
df
## # A tibble: 2 x 1
##   x         
##   <list>    
## 1 <list [2]>
## 2 <list [2]>
df %>%
  mutate(
    a = map_dbl(x, "a"),
    b = map_dbl(x, "b", .null = NA_real_)
  )
## # A tibble: 2 x 3
##   x              a     b
##   <list>     <dbl> <dbl>
## 1 <list [2]>     1     2
## 2 <list [2]>     2    NA
tibble(x = 1:2, y = list(1:4, 1)) %>% unnest(y)
## # A tibble: 5 x 2
##       x     y
##   <int> <dbl>
## 1     1     1
## 2     1     2
## 3     1     3
## 4     1     4
## 5     2     1
df1 <- tribble(
  ~x, ~y, ~z,
  1, c("a","b"), 1:2,
  2, "c", 3
)
df1
## # A tibble: 2 x 3
##       x y         z        
##   <dbl> <list>    <list>   
## 1     1 <chr [2]> <int [2]>
## 2     2 <chr [1]> <dbl [1]>
df1 %>% unnest(y,z)
## # A tibble: 3 x 3
##       x y         z
##   <dbl> <chr> <dbl>
## 1     1 a         1
## 2     1 b         2
## 3     2 c         3
df2 <- tribble(
  ~x, ~y, ~z,
  1, "a", 1:2,
  2, c("b","c"),3
)
df2
## # A tibble: 2 x 3
##       x y         z        
##   <dbl> <list>    <list>   
## 1     1 <chr [1]> <int [2]>
## 2     2 <chr [2]> <dbl [1]>
df2 %>% unnest(y,z)
## All nested columns must have the same number of elements.

Part 5. Communicate

##21. R markdown

eval = FALSE,
include = FALSE,
echo = FALSE,
message = FALSE,
warning = FALSE,
results = 'hide',
fig.show = 'hide'
error = TRUE
}
mtcars[1:5, 1:10]
##                    mpg cyl disp  hp drat    wt  qsec vs am gear
## Mazda RX4         21.0  24  160 110 3.90 2.620 16.46  0  1    4
## Mazda RX4 Wag     21.0  24  160 110 3.90 2.875 17.02  0  1    4
## Datsun 710        22.8  16  108  93 3.85 2.320 18.61  1  1    4
## Hornet 4 Drive    21.4  24  258 110 3.08 3.215 19.44  1  0    3
## Hornet Sportabout 18.7  32  360 175 3.15 3.440 17.02  0  0    3
knitr::kable(
  mtcars[1:5,],
  caption = "A knitr kable."
)
A knitr kable.
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 24 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 24 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 16 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 24 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 32 360 175 3.15 3.440 17.02 0 0 3 2
knitr::clean_cache()
## NULL
# knitr::opts_chunk$set(
#   comment = "#>",
#   collapse = TRUE)

# knitr::opts_chunk$set(
# echo = FALSE
# )
# we have about `r nrow(diaamonds)` diamonds. Only `r nrow(diamonds) - nrow(smaller)` are larger than 2.5 carats. The distribution of the remainder is shown below:
comma <- function(x) format(x, digits = 2, big.mark = ",")
comma(3452345)
## [1] "3,452,345"
comma(.12358124331)
## [1] "0.12"
# --- 
# output: html_document
# params: my_class:"suv"
---
# ```{r setup, include = FALSE}
# library(ggplot2)
# library(dplyr)
# class <- mpg %>% filter(class == params$my_class)
# ```
# # Fuel economy for `r params$my_class`s
# ```{r, message = FALSE}
# ggplot(class, aes(displ, hwy)) +
# geom_point() +
# geom_smooth(se = FALSE)
# ```
## Error: <text>:16:0: unexpected end of input
## 14: # geom_smooth(se = FALSE)
## 15: # ```
##    ^
# params:
# start: !r lubridate::ymd("2015-01-01")
# snapshot: !r lubridate::ymd_hms("2015-01-01 12:30:00")
# rmarkdown::render(
#   "fuel-economy.Rmd",
#   params = list(my_class = "suv")
# )
# reports <- tibble(
#  class = unique(mpg$class),
#  filename = stringr::str_c("fuel-economy-",class,".html"),
#  params = purrr:map(class, ~list(my_class = .))
#)
#reports
#reports %>%
#  select(output_file = filename, params) %>%
#  purrr::pwalk(rmarkdown::render, input = "fuel-economy.Rmd")

22. Graphics for communication with ggplot2

library(tidyverse)
ggplot(mpg, aes(displ, hwy)) + 
  geom_point(aes(color = class)) + 
  geom_smooth(se = TRUE) + 
  labs(
    title = paste(
      "Fuel efficiency generally decreases with engine size"
    )
  )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = class)) +
geom_smooth(se = FALSE) +
labs(
title = paste(
"Fuel efficiency generally decreases with", "engine size"),
subtitle = paste(
"Two seaters (sports cars) are an exception", "because of their light weight"),
caption = "Data from fueleconomy.gov"
)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(mpg, aes(displ, hwy)) + 
  geom_point(aes(color = class)) + 
  geom_smooth(se = FALSE) + 
  labs(
    x = "Engine displacement (L)",
    y = "Highway fuel economy (mpg)",
    color = "Car type"
  )
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

df <- tibble(
x = runif(10),
y = runif(10)
)
ggplot(df, aes(x, y)) +
geom_point() +
labs(
x = quote(sum(x[i] ^ 2, i == 1, n)),
y = quote(alpha + beta + frac(delta, theta))
)

best_in_class <- mpg %>%
  group_by(class) %>%
  filter(row_number(desc(hwy)) == 1)

ggplot(mpg, aes(displ, hwy)) + 
  geom_point(aes(color = class)) + 
  geom_text(aes(label = model), data = best_in_class)

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_label(
    aes(label = model),
    data = best_in_class,
    nudge_y = 2,
    alpha = 0.5
)

ggplot(mpg, aes(displ, hwy)) + 
  geom_point(aes(color = class)) + 
  geom_point(size = 3, shape = 1, data = best_in_class) + 
  ggrepel::geom_label_repel(
    aes(label = model),
    data = best_in_class
  )

class_avg <- mpg %>%
group_by(class) %>%
summarize(
displ = median(displ),
hwy = median(hwy)
)

ggplot(mpg, aes(displ, hwy, color = class)) +
ggrepel::geom_label_repel(aes(label = class),
data = class_avg,
size = 6,
label.size = 0,
segment.color = NA
) +
geom_point() +
theme(legend.position = "none")

label <- mpg %>%
summarize(
displ = max(displ),
hwy = max(hwy),
label = paste(
"Increasing engine size is \nrelated to decreasing fuel economy."
)
)
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_text(
aes(label = label),
data = label,
vjust = "top",
hjust = "right"
)

label <- tibble(
displ = Inf,
hwy = Inf,
label = paste(
"Increasing engine size is \nrelated to decreasing fuel economy."
)
)
ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_text(
aes(label = label),
data = label,
vjust = "top",
hjust = "right"
)

"Increasing engine size related to decreasing fuel economy." %>%
stringr::str_wrap(width = 40) %>%
writeLines()
## Increasing engine size related to
## decreasing fuel economy.
# geom_hline(), geom_vline(), geom_rect(), geom_segment() 
ggplot(mpg, aes(displ, hwy)) + 
  geom_point(aes(color = class))

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  scale_x_continuous() +
  scale_y_continuous() +
  scale_color_discrete()

ggplot(mpg, aes(displ, hwy)) + 
  geom_point() + 
  scale_y_continuous(breaks = seq(15,40,by = 5))

ggplot(mpg, aes(displ, hwy)) +
  geom_point() +
  scale_x_continuous(labels = NULL) +
  scale_y_continuous(labels = NULL)

presidential %>%
  mutate(id = 33 + row_number()) %>%
  ggplot(aes(start, id)) +
    geom_point() +
    geom_segment(aes(xend = end, yend = id)) +
    scale_x_date(
      NULL,
      breaks = presidential$start,
      date_labels = "'%y"
)

base <- ggplot(mpg, aes(displ, hwy)) + 
  geom_point(aes(color = class))

base + theme(legend.position = "left")

base + theme(legend.position = "top")

base + theme(legend.position = "bottom")

base + theme(legend.position = "right")

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth(se = FALSE) +
  theme(legend.position = "bottom") +
  guides(
    color = guide_legend(
    nrow = 1,
    override.aes = list(size = 4)
  )
)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(diamonds, aes(carat, price)) + 
  geom_bin2d()
## Warning: Removed 1 rows containing non-finite values (stat_bin2d).

ggplot(diamonds, aes(log10(carat), log10(price))) + 
  geom_bin2d()
## Warning: Removed 1 rows containing non-finite values (stat_bin2d).

ggplot(diamonds, aes(carat, price)) + 
  geom_bin2d() + 
  scale_x_log10() + 
  scale_y_log10()
## Warning: Removed 1 rows containing non-finite values (stat_bin2d).

ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = drv))

ggplot(mpg, aes(displ, hwy)) +
geom_point(aes(color = drv)) +
scale_color_brewer(palette = "Set1")

ggplot(mpg, aes(displ, hwy)) +
  geom_point(aes(color = drv, shape = drv)) +
  scale_color_brewer(palette = "Set1")

presidential %>%
  mutate(id = 33 + row_number()) %>%
  ggplot(aes(start, id, color = party)) + 
  geom_point() + 
  geom_segment(aes(xend = end, yend = id)) + 
  scale_color_manual(
    values = c(Republican = "red", Democratic = "blue")
  )

df <- tibble(
x = rnorm(10000),
y = rnorm(10000)
)
ggplot(df, aes(x, y)) +
geom_hex() +
coord_fixed()

ggplot(df, aes(x, y)) +
geom_hex() +
viridis::scale_fill_viridis() +
coord_fixed()

ggplot(mpg, mapping = aes(displ, hwy)) +
  geom_point(aes(color = class)) + 
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(mpg, mapping = aes(displ, hwy)) +
  geom_point(aes(color = class)) + 
  geom_smooth() + 
  coord_cartesian(xlim = c(5,7), ylim = c(10,30))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

mpg %>%
  filter(displ >= 5, displ <= 7, hwy >= 10, hwy <= 30) %>%
  ggplot(aes(displ, hwy)) +
  geom_point(aes(color = class)) +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

suv <- mpg %>% filter(class == "suv")
compact <- mpg %>% filter(class == "compact")

ggplot(suv, aes(displ, hwy, color = drv)) + geom_point()

ggplot(compact, aes(displ, hwy, color = drv)) + geom_point()

x_scale <- scale_x_continuous(limits = range(mpg$displ))
y_scale <- scale_y_continuous(limits = range(mpg$hwy))
col_scale <- scale_color_discrete(limits = unique(mpg$drv))

ggplot(suv, aes(displ, hwy, color = drv)) + 
  geom_point() + 
  x_scale + 
  y_scale + 
  col_scale

ggplot(compact, aes(displ, hwy, color = drv)) + 
  geom_point() + 
  x_scale + 
  y_scale + 
  col_scale

ggplot(mpg, aes(displ, hwy)) + 
  geom_point(aes(color = class)) + 
  geom_smooth() + 
  theme_bw()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(mpg, aes(displ, hwy)) + 
  geom_point(aes(color = class)) + 
  geom_smooth() + 
  theme_dark()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggplot(mpg, aes(displ, hwy)) + geom_point()

# ggsave("my_plot.pdf)
# fig.width, fig.height, fig.asp, out.width, out.height

23. R markdown formats

# --- 
# title: "viridis Demo"
# output: html_document
# ---
# rmarkdown::render(
#  "diamond-sizes.Rmd",
#  output_format = "word_document"
# )
# flexdashboard
# htmlwidgets
library(leaflet)
leaflet() %>%
setView(174.764, -36.877, zoom = 16) %>%
addTiles() %>%
addMarkers(174.764, -36.877, popup = "Maungawhau")
library(shiny)
textInput("name", "What is your name?")
numericInput("age", "How old are you?", NA, min = 0, max = 150)

24. R markdown workflow

#